
-----

# **`Python For Beginners`**

## **Author: Muhammad Adil Naeem**
## **Contact Me: `madilnaeem@gmail.com`**


------


![a1.PNG](attachment:a1.PNG)

![a2.PNG](attachment:a2.PNG)

### **`Pandas in Python`**

In [64]:
import pandas as pd

In [65]:
# Check pandas version
pd.__version__

'2.2.1'

#### **`Series`**
- Create
- Read
- Update

![a3.PNG](attachment:a3.PNG)

In [66]:
# Creating a Series
arr = [0,1,2,3,4,5]
s1 = pd.Series(arr)
s1

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64

In [67]:
order = [1,2,3,4,5,6]
S2 = pd.Series(arr, index = order)
S2

1    0
2    1
3    2
4    3
5    4
6    5
dtype: int64

In [68]:
# Creating a Series using numpy
import numpy as np
n = np.random.randn(5) # generate 5 random numbers
index = ['a','b','c','d','e']
pd.Series(n, index = index)

a   -0.638424
b    0.241568
c    1.151324
d   -0.788470
e    0.810182
dtype: float64

In [69]:
# Create a Series from a dictionary
d = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}
pd.Series(d)

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [70]:
# Slicing a Series
a = S2[2:5]
a

3    2
4    3
5    4
dtype: int64

#### **`Operations in Series`**

In [71]:
arr1 = [0,1,2,3,4,5,7]
arr2 = [6,7,8,9,5]

In [72]:
s5 = pd.Series(arr1)
s5

0    0
1    1
2    2
3    3
4    4
5    5
6    7
dtype: int64

In [73]:
print("Median: ",s5.median())
print("Maximum: ",s5.max())
print("Minimum: ",s5.min())

Median:  3.0
Maximum:  7
Minimum:  0


### **`Dataframe in Pandas`**


![a4.PNG](attachment:a4.PNG)

In [74]:
dates = pd.date_range('today', periods = 6) # today is the start date
dates

DatetimeIndex(['2024-10-01 19:23:46.596321', '2024-10-02 19:23:46.596321',
               '2024-10-03 19:23:46.596321', '2024-10-04 19:23:46.596321',
               '2024-10-05 19:23:46.596321', '2024-10-06 19:23:46.596321'],
              dtype='datetime64[ns]', freq='D')

In [75]:
num_arr = np.random.randn(6,4) # 6 rows and 4 column

num_arr

array([[-2.32881319, -1.28475324,  0.2371547 , -0.59196766],
       [ 0.630866  , -0.93447035,  0.0030034 ,  0.82116494],
       [-0.41685338,  0.5405404 , -0.11230371, -0.26644003],
       [ 0.80340029, -1.76134756, -0.37509064, -2.08023824],
       [ 1.39078632, -0.58162724,  0.61666348, -0.2350144 ],
       [-0.62837153, -0.92726904, -0.41217956,  2.604483  ]])

##### **`Creating a DataFrame`**

In [76]:
columns = ['A','B','C','D'] # column names
df = pd.DataFrame(num_arr, index = dates, columns = columns)
df

Unnamed: 0,A,B,C,D
2024-10-01 19:23:46.596321,-2.328813,-1.284753,0.237155,-0.591968
2024-10-02 19:23:46.596321,0.630866,-0.93447,0.003003,0.821165
2024-10-03 19:23:46.596321,-0.416853,0.54054,-0.112304,-0.26644
2024-10-04 19:23:46.596321,0.8034,-1.761348,-0.375091,-2.080238
2024-10-05 19:23:46.596321,1.390786,-0.581627,0.616663,-0.235014
2024-10-06 19:23:46.596321,-0.628372,-0.927269,-0.41218,2.604483


In [77]:
# Create a DataFrame from a dictionary array

data = {"animal": ["cat", "cat", "snake", "dog", "dog", "cat", "snake", "cat", "dog", "dog"],
        "age": [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        "visits": [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        "priority": ["yes", "yes", "no", "yes", "no", "no", "no", "yes", "no", "no"]}

labels = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]

df = pd.DataFrame(data, index = labels)
df

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [78]:
# Check first 5 rows of a DataFrame

df.head()

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no


In [79]:
# Check last 5 rows of dataset

df.tail()

Unnamed: 0,animal,age,visits,priority
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [80]:
# check columns name

df.columns

Index(['animal', 'age', 'visits', 'priority'], dtype='object')

In [81]:
# Checks Datatypes of Columns

df.dtypes

animal       object
age         float64
visits        int64
priority     object
dtype: object

In [82]:
# Check shape of DataFrame

df.shape

(10, 4)

In [83]:
# check detailed information about data

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   animal    10 non-null     object 
 1   age       8 non-null      float64
 2   visits    10 non-null     int64  
 3   priority  10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes


In [84]:
# Statistics of DataFrame

df.describe() # only shows numeric columns

Unnamed: 0,age,visits
count,8.0,10.0
mean,3.4375,1.9
std,2.007797,0.875595
min,0.5,1.0
25%,2.375,1.0
50%,3.0,2.0
75%,4.625,2.75
max,7.0,3.0


In [85]:
# take transpose of data

df.T

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
animal,cat,cat,snake,dog,dog,cat,snake,cat,dog,dog
age,2.5,3.0,0.5,,5.0,2.0,4.5,,7.0,3.0
visits,1,3,2,3,2,3,1,1,2,1
priority,yes,yes,no,yes,no,no,no,yes,no,no


In [86]:
df.sort_values(by = "age")

Unnamed: 0,animal,age,visits,priority
c,snake,0.5,2,no
f,cat,2.0,3,no
a,cat,2.5,1,yes
b,cat,3.0,3,yes
j,dog,3.0,1,no
g,snake,4.5,1,no
e,dog,5.0,2,no
i,dog,7.0,2,no
d,dog,,3,yes
h,cat,,1,yes


In [87]:
# slicing dataframe by age
df.sort_values(by='age')[1:3]

Unnamed: 0,animal,age,visits,priority
f,cat,2.0,3,no
a,cat,2.5,1,yes


In [88]:
# Qquerry dataframe by tag 

df[["animal", "age"]]

Unnamed: 0,animal,age
a,cat,2.5
b,cat,3.0
c,snake,0.5
d,dog,
e,dog,5.0
f,cat,2.0
g,snake,4.5
h,cat,
i,dog,7.0
j,dog,3.0


In [89]:
# slicing dataframe by index

df.iloc[1:3]

Unnamed: 0,animal,age,visits,priority
b,cat,3.0,3,yes
c,snake,0.5,2,no


In [90]:
# Check for null values

df.isnull()

Unnamed: 0,animal,age,visits,priority
a,False,False,False,False
b,False,False,False,False
c,False,False,False,False
d,False,True,False,False
e,False,False,False,False
f,False,False,False,False
g,False,False,False,False
h,False,True,False,False
i,False,False,False,False
j,False,False,False,False


In [91]:
# Check for sum of null values

df.isnull().sum()

animal      0
age         2
visits      0
priority    0
dtype: int64

In [92]:
df['age'].mean()

3.4375

In [93]:
df['age'].max()

7.0

In [94]:
# Let's work with strings

string = pd.Series(['A','B','C','D','Aaa', 'BaCa', np.nan, 'CBA', 'cow', 'owl'])

string

0       A
1       B
2       C
3       D
4     Aaa
5    BaCa
6     NaN
7     CBA
8     cow
9     owl
dtype: object

In [95]:
# to convert all characters in the string to lower case

string.str.lower() 

0       a
1       b
2       c
3       d
4     aaa
5    baca
6     NaN
7     cba
8     cow
9     owl
dtype: object

In [96]:
# to convert all characters in the string to upper case

string.str.upper() 

0       A
1       B
2       C
3       D
4     AAA
5    BACA
6     NaN
7     CBA
8     COW
9     OWL
dtype: object

### **`Operations in Dataframe to Deal With Missing Values`**

In [97]:
data = df.copy()
data1 = df.copy()
data

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [98]:
# Fill missing Values with mean
Mean = data['age'].mean()
data.fillna(Mean)

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,3.4375,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,3.4375,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [99]:
# Drop nullvalues

data1.dropna()

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
i,dog,7.0,2,no
j,dog,3.0,1,no


### **`Dataframe File Operations`**

In [None]:
# Save data to csv

data1.to_csv('data1.csv')

In [None]:
# Read Data from csv

read_data = pd.read_csv('data1.csv')

In [None]:
# Save data to xlsx

data1.to_xlsx('data1.csv')