# Guide to Pandas
Pandas series Data structure

In [1]:
import pandas as pd

In [2]:
shop = ['bananas', 'apples', 'strawberry']
pd.Series(shop)

0       bananas
1        apples
2    strawberry
dtype: object

In [3]:
num = [1,2,3,4,5,6,7,8]
pd.Series(num)

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
dtype: int64

In [4]:
shop.append(None)
pd.Series(shop)

0       bananas
1        apples
2    strawberry
3          None
dtype: object

In [5]:
num.append(None)
pd.Series(num)

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
7    8.0
8    NaN
dtype: float64

In [6]:
tech = {
    'laptop' : 'Dell',
    'Mobile' : 'Apple',
    'Tablet' : 'Samsung',
    'Speaker': 'Portronics'
}
t = pd.Series(tech)
t

laptop           Dell
Mobile          Apple
Tablet        Samsung
Speaker    Portronics
dtype: object

In [7]:
t.index

Index(['laptop', 'Mobile', 'Tablet', 'Speaker'], dtype='object')

In [8]:
s = pd.Series(tech, index=['laptop','Mobile','Speaker'])
s

laptop           Dell
Mobile          Apple
Speaker    Portronics
dtype: object

### Querying through Series

In [9]:
tech = {
    'laptop' : 'Dell',
    'Mobile' : 'Apple',
    'Tablet' : 'Samsung',
    'Speaker': 'Portronics'
}
t = pd.Series(tech)
t

laptop           Dell
Mobile          Apple
Tablet        Samsung
Speaker    Portronics
dtype: object

In [10]:
t.iloc[2]

'Samsung'

In [11]:
t.loc['Speaker']

'Portronics'

In [12]:
t[2]

'Samsung'

In [13]:
t['Speaker']

'Portronics'

In [14]:
tech ={
    23:'Dell',
    24:'Apple',
    25:'Samsung'
}
t = pd.Series(tech)

In [15]:
t[23]

'Dell'

# The DataFrame Data Structure

In [70]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Prakhar',
                        'Item Purchased': 'IPAD',
                        'Cost': 43000})
purchase_2 = pd.Series({'Name': 'Shubham',
                        'Item Purchased': 'Monitor',
                        'Cost': 27000})
purchase_3 = pd.Series({'Name': 'Simran',
                        'Item Purchased': 'Sneakers',
                        'Cost': 15000})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Prakhar,IPAD,43000
Store 1,Shubham,Monitor,27000
Store 2,Simran,Sneakers,15000


In [44]:
df.loc['Store 2']

Name                Simran
Item Purchased    Sneakers
Cost                 15000
Name: Store 2, dtype: object

In [45]:
df.loc['Store 1']

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Prakhar,IPAD,43000
Store 1,Shubham,Monitor,27000


In [46]:
type(df.loc['Store 2'])

pandas.core.series.Series

In [47]:
df.loc['Store 1', 'Cost']

Store 1    43000
Store 1    27000
Name: Cost, dtype: int64

`Transpose` of the table

In [48]:
df.loc['Store 1', 'Cost']

Store 1    43000
Store 1    27000
Name: Cost, dtype: int64

In [49]:
df.T

Unnamed: 0,Store 1,Store 1.1,Store 2
Name,Prakhar,Shubham,Simran
Item Purchased,IPAD,Monitor,Sneakers
Cost,43000,27000,15000


In [50]:
df.T.loc['Cost']

Store 1    43000
Store 1    27000
Store 2    15000
Name: Cost, dtype: object

In [51]:
df.loc['Store 1']['Cost']

Store 1    43000
Store 1    27000
Name: Cost, dtype: int64

In [52]:
df.loc[:,['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Prakhar,43000
Store 1,Shubham,27000
Store 2,Simran,15000


In [53]:
df.drop('Store 1')

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Simran,Sneakers,15000


In [54]:
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Prakhar,IPAD,43000
Store 1,Shubham,Monitor,27000
Store 2,Simran,Sneakers,15000


In [55]:
copy_df = df.copy()
copy_df = copy_df.drop('Store 1')
copy_df

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Simran,Sneakers,15000


In [56]:
del copy_df['Name']
copy_df

Unnamed: 0,Item Purchased,Cost
Store 2,Sneakers,15000


In [71]:
df['Location'] = None
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Prakhar,IPAD,43000,
Store 1,Shubham,Monitor,27000,
Store 2,Simran,Sneakers,15000,


# Dataframe Indexing and Loading

In [58]:
costs = df['Cost']
costs

Store 1    43000
Store 1    27000
Store 2    15000
Name: Cost, dtype: int64

In [59]:
costs+=200
costs

Store 1    43200
Store 1    27200
Store 2    15200
Name: Cost, dtype: int64

In [60]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Prakhar,IPAD,43200,
Store 1,Shubham,Monitor,27200,
Store 2,Simran,Sneakers,15200,


In [61]:
df['Cost']>30000

Store 1     True
Store 1    False
Store 2    False
Name: Cost, dtype: bool

`.head()` Fnction is used to review first five elements of your dataset

In [62]:
df.head()

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Prakhar,IPAD,43200,
Store 1,Shubham,Monitor,27200,
Store 2,Simran,Sneakers,15200,


In [63]:
df[(df['Cost']<30000) & (df['Item Purchased'] == 'Sneakers')]

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 2,Simran,Sneakers,15200,


In [64]:
df.count()

Name              3
Item Purchased    3
Cost              3
Location          0
dtype: int64

In [65]:
df['Cost'].count()

3

In [66]:
df = df.reset_index()
df

Unnamed: 0,index,Name,Item Purchased,Cost,Location
0,Store 1,Prakhar,IPAD,43200,
1,Store 1,Shubham,Monitor,27200,
2,Store 2,Simran,Sneakers,15200,


In [67]:
df.set_index(['index' , 'Item Purchased'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Cost,Location
index,Item Purchased,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Store 1,IPAD,Prakhar,43200,
Store 1,Monitor,Shubham,27200,
Store 2,Sneakers,Simran,15200,


In [69]:
columns_to_keep = ['Name' , 'Cost']
df = df[columns_to_keep]
df

Unnamed: 0,Name,Cost
0,Prakhar,43200
1,Shubham,27200
2,Simran,15200
