# Basics of Pandas Dataframes

## Create a dataframe 

In [3]:
import pandas as pd
import numpy as np

# Create some data 
data = np.array([['', 'Col 1', 'Col 2'], ['Row 1', 1, 2], ['Row 2', 3, 4]])
                
# Convert to data frame                
df = pd.DataFrame(data=data[1:,1:], index=data[1:,0], columns=data[0,1:])
df

Unnamed: 0,Col 1,Col 2
Row 1,1,2
Row 2,3,4


### Two-Dimensionalarray as input

In [4]:
# Take a 2D array as input to your DataFrame 
my_2darray = np.array([[1, 2, 3], [4, 5, 6]])
print(pd.DataFrame(my_2darray))

   0  1  2
0  1  2  3
1  4  5  6


### Dictionary as input

In [5]:
# Take a dictionary as input to your DataFrame 
my_dict = {1: ['1', '3'], 2: ['1', '2'], 3: ['2', '4']}
print(pd.DataFrame(my_dict))

   1  2  3
0  1  1  2
1  3  2  4


### Series as input

In [6]:
# Take a Series as input to your DataFrame (note the series gets sorted on the key)
my_series = pd.Series({"United Kingdom":"London", "India":"New Delhi", "United States":"Washington", "Belgium":"Brussels"})
df_series = pd.DataFrame(my_series)

In [7]:
df_series.index

Index(['Belgium', 'India', 'United Kingdom', 'United States'], dtype='object')

## Select an index or column

In [34]:
purchase_1 = pd.Series({'Name': 'Chris', 
                        'Item Purchased': 'Dog Food', 
                        'Cost': 22.50})


purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})

purchase_3 = pd.Series({'Name': 'Vinod', 
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 2', 'Store 3'])
df.head()

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 2,2.5,Kitty Litter,Kevyn
Store 3,5.0,Bird Seed,Vinod


### Individual cell

In [35]:
df.index

Index(['Store 1', 'Store 2', 'Store 3'], dtype='object')

In [36]:
df.iloc[0, 0]

22.5

In [37]:
df.iloc[0]['Cost']

22.5

In [38]:
df.loc['Store 1', 'Cost']

22.5

In [39]:
df.at['Store 1', 'Cost']

22.5

### Select a row

In [40]:
df.iloc[0]

Cost                  22.5
Item Purchased    Dog Food
Name                 Chris
Name: Store 1, dtype: object

In [41]:
df.loc['Store 1']

Cost                  22.5
Item Purchased    Dog Food
Name                 Chris
Name: Store 1, dtype: object

### Select a column

In [42]:
df.iloc[:, 1]

Store 1        Dog Food
Store 2    Kitty Litter
Store 3       Bird Seed
Name: Item Purchased, dtype: object

In [43]:
df.loc[:, 'Item Purchased']

Store 1        Dog Food
Store 2    Kitty Litter
Store 3       Bird Seed
Name: Item Purchased, dtype: object

## Add an index, row or column

### Add an Index

In [44]:
print(df)

         Cost Item Purchased   Name
Store 1  22.5       Dog Food  Chris
Store 2   2.5   Kitty Litter  Kevyn
Store 3   5.0      Bird Seed  Vinod


In [45]:
df.set_index('Cost')

Unnamed: 0_level_0,Item Purchased,Name
Cost,Unnamed: 1_level_1,Unnamed: 2_level_1
22.5,Dog Food,Chris
2.5,Kitty Litter,Kevyn
5.0,Bird Seed,Vinod


In [46]:
df.head()

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 2,2.5,Kitty Litter,Kevyn
Store 3,5.0,Bird Seed,Vinod


### Make the index part of the dataframe 

In [47]:
# Keep the index and make a copy as a column 
temp_df = df
temp_df['New'] = df.index
temp_df

Unnamed: 0,Cost,Item Purchased,Name,New
Store 1,22.5,Dog Food,Chris,Store 1
Store 2,2.5,Kitty Litter,Kevyn,Store 2
Store 3,5.0,Bird Seed,Vinod,Store 3


In [55]:
# Without a column label 
temp_df = df
temp_df.loc[:, 4] = df.index
temp_df

Unnamed: 0,Cost,Item Purchased,Name,New,4
Store 1,22.5,Dog Food,Chris,Store 1,Store 1
Store 2,2.5,Kitty Litter,Kevyn,Store 2,Store 2
Store 3,5.0,Bird Seed,Vinod,Store 3,Store 3


### Reset the index

In [60]:
temp_df.reset_index(level = 1, drop=True)

Unnamed: 0,Cost,Item Purchased,Name,New,4
0,22.5,Dog Food,Chris,Store 1,Store 1
1,2.5,Kitty Litter,Kevyn,Store 2,Store 2
2,5.0,Bird Seed,Vinod,Store 3,Store 3
