# Pandas Practice Tutorial 
This file explains all about the pandas 

- How to install pandas

In [181]:
# pip install pandas 
# pip install numpy 

- How to import Libraries

In [182]:
import pandas as pd
import numpy as np

- Object Creation

In [183]:
s=pd.Series([1,2,5,6,np.nan,7,8])
s

0    1.0
1    2.0
2    5.0
3    6.0
4    NaN
5    7.0
6    8.0
dtype: float64

In [184]:
date=pd.date_range("20220101",periods=4)
date

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04'], dtype='datetime64[ns]', freq='D')

In [185]:
df=pd.DataFrame(np.random.randn(4,4),index=list("1234"),columns=["A","B","C","D"])
df

Unnamed: 0,A,B,C,D
1,1.356854,-1.817602,0.661117,0.867663
2,2.664971,-0.242173,-0.763959,0.890603
3,0.78812,0.247153,0.306972,-0.600117
4,2.327342,-0.220029,0.294685,-0.091503


# Data Frame through Dictionary

In [186]:
df = pd.DataFrame(
    {
        "Name": [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth",
        ],
        "Age": [22, 35, 58],
        "Sex": ["male", "male", "female"],
    }
)
df

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female


In [187]:
df.dtypes

Name    object
Age      int64
Sex     object
dtype: object

# Uploading a dummy Data of Store
And performs some basic operations

In [188]:
store=pd.read_csv("Stores.csv")
store.head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
0,1,1659,1961,530,66490
1,2,1461,1752,210,39820
2,3,1340,1609,720,54010
3,4,1451,1748,620,53730
4,5,1770,2111,450,46620


In [189]:
store.head(2)

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
0,1,1659,1961,530,66490
1,2,1461,1752,210,39820


In [190]:
store.tail(2)

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
894,895,1299,1560,770,96610
895,896,1174,1429,1110,54340


In [191]:
store.index

RangeIndex(start=0, stop=896, step=1)

In [192]:
s.to_numpy()

array([ 1.,  2.,  5.,  6., nan,  7.,  8.])

In [193]:
store.describe()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
count,896.0,896.0,896.0,896.0,896.0
mean,448.5,1485.409598,1782.035714,786.350446,59351.305804
std,258.797218,250.237011,299.872053,265.389281,17190.741895
min,1.0,775.0,932.0,10.0,14920.0
25%,224.75,1316.75,1575.5,600.0,46530.0
50%,448.5,1477.0,1773.5,780.0,58605.0
75%,672.25,1653.5,1982.75,970.0,71872.5
max,896.0,2229.0,2667.0,1560.0,116320.0


# Transpose Data
Transpose converts columns into rows and rows into column and vice versa.

In [194]:
store.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,886,887,888,889,890,891,892,893,894,895
Store ID,1,2,3,4,5,6,7,8,9,10,...,887,888,889,890,891,892,893,894,895,896
Store_Area,1659,1461,1340,1451,1770,1442,1542,1261,1090,1030,...,1655,1156,1451,1539,1549,1582,1387,1200,1299,1174
Items_Available,1961,1752,1609,1748,2111,1733,1858,1507,1321,1235,...,1986,1398,1734,1829,1851,1910,1663,1436,1560,1429
Daily_Customer_Count,530,210,720,620,450,760,1030,1020,680,1130,...,1150,140,670,650,1220,1080,850,1060,770,1110
Store_Sales,66490,39820,54010,53730,46620,45260,72240,37720,46310,44150,...,77430,92370,34880,46580,70620,66390,82080,76440,96610,54340


# Sorting Data

The following code sorts the data row wise because axis=0. And (False,True ) describe 
to ascend data in ascending order or in decending order.\
**Note** :: If we want to sort data column wise then we use axis=1, and remaning process is same.

In [195]:
store.sort_index(axis=0,ascending=False).head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
895,896,1174,1429,1110,54340
894,895,1299,1560,770,96610
893,894,1200,1436,1060,76440
892,893,1387,1663,850,82080
891,892,1582,1910,1080,66390


In [196]:
store.sort_index(axis=1,ascending=True).head()

Unnamed: 0,Daily_Customer_Count,Items_Available,Store ID,Store_Area,Store_Sales
0,530,1961,1,1659,66490
1,210,1752,2,1461,39820
2,720,1609,3,1340,54010
3,620,1748,4,1451,53730
4,450,2111,5,1770,46620


In [197]:
store.sort_values(by="Store_Area",ascending=True).head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
158,159,775,932,1090,42530
865,866,780,951,790,25600
744,745,854,1018,660,77740
549,550,869,1050,850,52540
72,73,891,1073,630,67370


# Filtering Data


In [198]:
# Column wise filtering
store["Store ID "].head()

0    1
1    2
2    3
3    4
4    5
Name: Store ID , dtype: int64

In [199]:
# Row wise filtering 
store[0:4]

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
0,1,1659,1961,530,66490
1,2,1461,1752,210,39820
2,3,1340,1609,720,54010
3,4,1451,1748,620,53730


# Loc and Iloc To Extract Data
- Loc
   - In loc function we can extract data of rows through index no and data of Columns through it's name 
      - Last element is not exclusive

In [200]:
store.loc[:,:].head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
0,1,1659,1961,530,66490
1,2,1461,1752,210,39820
2,3,1340,1609,720,54010
3,4,1451,1748,620,53730
4,5,1770,2111,450,46620


In [201]:
store.loc[1:2,["Store_Area","Store_Sales"]].head()

Unnamed: 0,Store_Area,Store_Sales
1,1461,39820
2,1340,54010


In [202]:
store.loc[3:4,["Items_Available","Store_Sales"]]

Unnamed: 0,Items_Available,Store_Sales
3,1748,53730
4,2111,46620


- Iloc
    - In iloc we can extract data of Rows and columns through it's index numbers
        - Last element is exclusive but columns last index is not exclusive

In [203]:
store.iloc[:,:].head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
0,1,1659,1961,530,66490
1,2,1461,1752,210,39820
2,3,1340,1609,720,54010
3,4,1451,1748,620,53730
4,5,1770,2111,450,46620


In [204]:
store.iloc[0:2,0:3]

Unnamed: 0,Store ID,Store_Area,Items_Available
0,1,1659,1961
1,2,1461,1752


# Booleans Operators

In [205]:
store[store["Store_Area"]<1000].head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
41,42,965,1152,600,48140
72,73,891,1073,630,67370
158,159,775,932,1090,42530
165,166,955,1133,580,46140
203,204,992,1192,900,34180


In [206]:
store[store<10].head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
0,1.0,,,,
1,2.0,,,,
2,3.0,,,,
3,4.0,,,,
4,5.0,,,,


# Copy Method
- This function copies the data of one variable into another

In [207]:
store1=store.copy()
store1.head()

Unnamed: 0,Store ID,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
0,1,1659,1961,530,66490
1,2,1461,1752,210,39820
2,3,1340,1609,720,54010
3,4,1451,1748,620,53730
4,5,1770,2111,450,46620


# Pandas Study Case 
# Kashti Data Set

- Import libraries

In [208]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

In [209]:
kashti=sns.load_dataset("titanic")
kashti.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


# Saving Data into .csv file

In [210]:
kashti.to_csv("kashti.xlxs")


# Droping Columns and Rows
Droping Rows

In [211]:
row_drop=kashti.drop([2,3],axis=0).head()
row_drop

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True


Droping Columns from existing data

In [212]:
row_drop.drop(["deck"],axis=1)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,Cherbourg,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,Southampton,no,True


# Basic Statistics

In [213]:
kashti.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,891.0,891.0,714.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,0.0,1.0,0.42,0.0,0.0,0.0
25%,0.0,2.0,20.125,0.0,0.0,7.9104
50%,0.0,3.0,28.0,0.0,0.0,14.4542
75%,1.0,3.0,38.0,1.0,0.0,31.0
max,1.0,3.0,80.0,8.0,6.0,512.3292


In [214]:
kashti.mean()

  kashti.mean()


survived       0.383838
pclass         2.308642
age           29.699118
sibsp          0.523008
parch          0.381594
fare          32.204208
adult_male     0.602694
alone          0.602694
dtype: float64

In [215]:
kashti.value_counts("survived")

survived
0    549
1    342
dtype: int64

In [216]:
kashti.groupby(["sex"]).mean()

Unnamed: 0_level_0,survived,pclass,age,sibsp,parch,fare,adult_male,alone
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,0.742038,2.159236,27.915709,0.694268,0.649682,44.479818,0.0,0.401274
male,0.188908,2.389948,30.726645,0.429809,0.235702,25.523893,0.930676,0.712305


In [217]:
kashti.groupby(["sex","class"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,pclass,age,sibsp,parch,fare,adult_male,alone
sex,class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
female,First,0.968085,1.0,34.611765,0.553191,0.457447,106.125798,0.0,0.361702
female,Second,0.921053,2.0,28.722973,0.486842,0.605263,21.970121,0.0,0.421053
female,Third,0.5,3.0,21.75,0.895833,0.798611,16.11881,0.0,0.416667
male,First,0.368852,1.0,41.281386,0.311475,0.278689,67.226127,0.97541,0.614754
male,Second,0.157407,2.0,30.740707,0.342593,0.222222,19.741782,0.916667,0.666667
male,Third,0.135447,3.0,26.507589,0.498559,0.224784,12.661633,0.919308,0.760807


In [218]:
kashti[kashti["age"]<18].groupby(["sex","class"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,pclass,age,sibsp,parch,fare,adult_male,alone
sex,class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
female,First,0.875,1.0,14.125,0.5,0.875,104.083337,0.0,0.125
female,Second,1.0,2.0,8.333333,0.583333,1.083333,26.241667,0.0,0.166667
female,Third,0.542857,3.0,8.428571,1.571429,1.057143,18.727977,0.0,0.228571
male,First,1.0,1.0,8.23,0.5,2.0,116.0729,0.25,0.0
male,Second,0.818182,2.0,4.757273,0.727273,1.0,25.659473,0.181818,0.181818
male,Third,0.232558,3.0,9.963256,2.069767,1.0,22.752523,0.348837,0.232558
