## Usecase of pandas
- Data Cleaning
- Data Analysis
- Data Transformation
- Data Visualization (Basic Level)
- Data Aggregation
- Data Handling
- Data Filtering & Selection
- Time Series analysis

In [2]:
# Series
# A series is a one dimentional labelled array capable of holding any data type. the axis are collectively called the index

In [3]:
import numpy as np
import pandas as pd

In [4]:
labels = ['a','b','c']
my_lst = [10,20,30]
arr = np.array([10,20,30])
dic = {1:10,2:20,3:30}

In [5]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int64

In [6]:
pd.Series(dic)

1    10
2    20
3    30
dtype: int64

In [7]:
pd.Series(my_lst,index=labels)

a    10
b    20
c    30
dtype: int64

# DataFrames
- Creating dataframe
- Selection and indexing of cols 
- Creating new col 
- Removing cols
- selecting rows
- selecting subsets of rows and cols
- conditional selection

In [8]:
data = {
    "Name":['Shivam','Shahi','Rohan','Raghav','Ram'],
    "Age": [20,23,34,56,78],
    "City":["New York","Paris","Berlin","London","India"],
    "Salary":[65000,70000,62000,85000,np.nan]
}

df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,Name,Age,City,Salary
0,Shivam,20,New York,65000.0
1,Shahi,23,Paris,70000.0
2,Rohan,34,Berlin,62000.0
3,Raghav,56,London,85000.0
4,Ram,78,India,


In [10]:
df['Age'].astype(float)

0    20.0
1    23.0
2    34.0
3    56.0
4    78.0
Name: Age, dtype: float64

In [12]:
# selection of cols

df[['Age','Name']]

Unnamed: 0,Age,Name
0,20,Shivam
1,23,Shahi
2,34,Rohan
3,56,Raghav
4,78,Ram


In [13]:
df['Name']

0    Shivam
1     Shahi
2     Rohan
3    Raghav
4       Ram
Name: Name, dtype: object

In [15]:
# creating new cols

df['Designation'] = ['Data Scientist', "Devops","AI Engineer","Data Analyst","Doctor"]

In [16]:
df

Unnamed: 0,Name,Age,City,Salary,Designation
0,Shivam,20,New York,65000.0,Data Scientist
1,Shahi,23,Paris,70000.0,Devops
2,Rohan,34,Berlin,62000.0,AI Engineer
3,Raghav,56,London,85000.0,Data Analyst
4,Ram,78,India,,Doctor


In [19]:
# removing col

df = df.drop('Designation',axis=1)

In [20]:
df

Unnamed: 0,Name,Age,City,Salary
0,Shivam,20,New York,65000.0
1,Shahi,23,Paris,70000.0
2,Rohan,34,Berlin,62000.0
3,Raghav,56,London,85000.0
4,Ram,78,India,


In [21]:
# selection row
df

Unnamed: 0,Name,Age,City,Salary
0,Shivam,20,New York,65000.0
1,Shahi,23,Paris,70000.0
2,Rohan,34,Berlin,62000.0
3,Raghav,56,London,85000.0
4,Ram,78,India,


In [23]:
df.loc[4]

Name        Ram
Age          78
City      India
Salary      NaN
Name: 4, dtype: object

In [24]:
df.iloc[4]

Name        Ram
Age          78
City      India
Salary      NaN
Name: 4, dtype: object

In [26]:
# selecting subjects of rows and cols

df.loc[0][['City','Name']]

City    New York
Name      Shivam
Name: 0, dtype: object

In [27]:
df

Unnamed: 0,Name,Age,City,Salary
0,Shivam,20,New York,65000.0
1,Shahi,23,Paris,70000.0
2,Rohan,34,Berlin,62000.0
3,Raghav,56,London,85000.0
4,Ram,78,India,


In [29]:
df.loc[[2,3]][["Name",'Age']]

Unnamed: 0,Name,Age
2,Rohan,34
3,Raghav,56


In [30]:
# conditional selection

df2 = df.copy()

In [34]:
df2.fillna(50000,inplace=True)

Unnamed: 0,Name,Age,City,Salary
0,Shivam,20,New York,65000.0
1,Shahi,23,Paris,70000.0
2,Rohan,34,Berlin,62000.0
3,Raghav,56,London,85000.0
4,Ram,78,India,


In [36]:
# people whose age is above 30

df[df['Age']>30]

Unnamed: 0,Name,Age,City,Salary
2,Rohan,34,Berlin,62000.0
3,Raghav,56,London,85000.0
4,Ram,78,India,


#### MISSING DATA

In [38]:
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [10, np.nan, 30, 40, 50],
    'C': [100, 200, 300, np.nan, 500]
}

df3 = pd.DataFrame(data)

In [40]:
df3

Unnamed: 0,A,B,C
0,1.0,10.0,100.0
1,2.0,,200.0
2,,30.0,300.0
3,4.0,40.0,
4,5.0,50.0,500.0


In [43]:
df3.isna().sum()

A    1
B    1
C    1
dtype: int64