# Introduction to Pandas

In [1]:
import pandas as pd
import numpy as np


### Creating Series 

In [2]:

# From list
s = pd.Series([10, 20, 30, 40])

# From dict
s2 = pd.Series({"Name": "Naveed", "Age": 18})

# From NumPy array
arr = np.random.randint(1, 100, 5)
s3 = pd.Series(arr)


### Creating DataFrame

In [3]:
# From list of dicts
df = pd.DataFrame([
    {"Name":"Naveed", "Age":18},
    {"Name":"Dogesh badmosh", "Age":17}
])

# From NumPy array
df2 = pd.DataFrame(np.random.rand(3,3), columns=['A','B','C'])

# From dictionary of lists
df3 = pd.DataFrame({
    "Name":["Ali","Fatima"],
    "Age":[20,21]
})


### Viewing Data

In [4]:
df.head(5)        # first 5 rows
df.tail(5)        # last 5 rows
df.shape          # (rows, columns)
df.info()         # summary
df.describe()     # stats for numeric columns


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    2 non-null      object
 1   Age     2 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 164.0+ bytes


Unnamed: 0,Age
count,2.0
mean,17.5
std,0.707107
min,17.0
25%,17.25
50%,17.5
75%,17.75
max,18.0


### Selecting Data

In [5]:
df['Name']        # select column
df[['Name','Age']] # multiple columns
df.iloc[0]        # select first row
df.iloc[0,1]      # row 0, col 1
df[df['Age']>=18]  # filter rows


Unnamed: 0,Name,Age
0,Naveed,18


### Sorting Data

In [6]:
df.sort_values('Age')            # ascending
df.sort_values('Age', ascending=False)  # descending
df.sort_index()                  # sort by index


Unnamed: 0,Name,Age
0,Naveed,18
1,Dogesh badmosh,17


### Adding / Updating / Deleting Columns

In [11]:
df['Grade'] = [85, 90]          # add new column
df['Age'] = df['Age'] + 1       # update column
df.drop('Grade', axis=1, inplace=True) # delete column


### Handling the  Missing Data

In [12]:
df.isnull()           # check missing
df.dropna()           # drop missing rows
df.fillna(0)          # fill missing


Unnamed: 0,Name,Age
0,Naveed,20
1,Dogesh badmosh,19


### Aggregation 

In [13]:
df['Age'].mean()
df['Age'].sum()
df['Age'].max()
df['Age'].min()
df['Age'].count()


np.int64(2)

### Merging & Concatenating

In [15]:
df1 = pd.DataFrame({'key':['A','B'], 'val1':[1,2]})
df2 = pd.DataFrame({'key':['B','C'], 'val2':[3,4]})

pd.merge(df1, df2, on='key', how='inner')  # inner join
pd.concat([df1, df2], axis=0)              # vertical concat


Unnamed: 0,key,val1,val2
0,A,1.0,
1,B,2.0,
0,B,,3.0
1,C,,4.0


### Apply / Lambda / Map

In [16]:
df['Double_Age'] = df['Age'].apply(lambda x: x*2)
df['Name'] = df['Name'].map(str.upper)


### Sampling & Duplicates

In [None]:
df.sample(2)          # random 2 rows
df.drop_duplicates()  # remove duplicates
df.reset_index(drop=True) # reset index


Unnamed: 0,Name,Age,Double_Age
0,NAVEED,18,36
1,AYESHA,18,36


### Save & Load Files

In [None]:
df.to_csv("data.csv", index=False)
df_loaded = pd.read_csv("data.csv")
df.to_excel("data.xlsx", index=False)
df_loaded = pd.read_excel("data.xlsx")
