### Basics of Pandas - Series, DataFrame

In [1]:
#Import pandas and numpy libraries
import pandas as pd
import numpy as np

## Series

In [2]:
#Create a series
s = pd.Series(data = ['apple','banana','cherry','grapes','ornage'], index = [0,1,2,3,4])

In [3]:
s

0     apple
1    banana
2    cherry
3    grapes
4    ornage
dtype: object

In [4]:
#display length of series
len(s)

5

In [5]:
#Create new series with random numbers
s = pd.Series(data = np.random.randn(5),index = range(1,6), name = 'numbers')

In [6]:
s

1   -0.071852
2    0.951779
3    1.257175
4   -0.846549
5    1.137608
Name: numbers, dtype: float64

In [7]:
#display maximum value in series
s.max()

1.2571745879789036

In [8]:
#display minimum value in series
s.min()

-0.8465488148768718

In [55]:
#display mean value of series
s.mean()

0.48563208232376986

In [10]:
#Square each element of the series
s**2

1    0.005163
2    0.905883
3    1.580488
4    0.716645
5    1.294152
Name: numbers, dtype: float64

In [11]:
#square root of the squared series elements
np.sqrt(s**2)

1    0.071852
2    0.951779
3    1.257175
4    0.846549
5    1.137608
Name: numbers, dtype: float64

## DataFrame

In [12]:
#Create a dataframe with 2 columns
df = pd.DataFrame({'A':['apple','carrot','orange','cherry','grape','onion'],
                   'B':['Fruit','Veggie','Fruit','Fruit','Fruit','Veggie']},
                 index=[1,2,3,4,5,6])

In [13]:
#display dataframe
df

Unnamed: 0,A,B
1,apple,Fruit
2,carrot,Veggie
3,orange,Fruit
4,cherry,Fruit
5,grape,Fruit
6,onion,Veggie


In [14]:
#Rename the columns of the data frame
df.rename(columns ={'A':'Items','B':'Type'})

Unnamed: 0,Items,Type
1,apple,Fruit
2,carrot,Veggie
3,orange,Fruit
4,cherry,Fruit
5,grape,Fruit
6,onion,Veggie


In [15]:
#Change is not copies to the dataframe
df

Unnamed: 0,A,B
1,apple,Fruit
2,carrot,Veggie
3,orange,Fruit
4,cherry,Fruit
5,grape,Fruit
6,onion,Veggie


In [16]:
#Add inplace as True and rename columns
df.rename(columns ={'A':'Items','B':'Type'},inplace = True)

In [17]:
#Now the changes are saved to the dataframe
df

Unnamed: 0,Items,Type
1,apple,Fruit
2,carrot,Veggie
3,orange,Fruit
4,cherry,Fruit
5,grape,Fruit
6,onion,Veggie


In [56]:
#select data with row label 0 of dataframe
df.iloc[0,:]

Items    apple
Type     Fruit
Price      2.2
Name: 1, dtype: object

In [19]:
#We use .loc instead because .loc selects data by label and the index in dataframe doesn't have label 0.
#df.loc[0,:]

In [20]:
df

Unnamed: 0,Items,Type
1,apple,Fruit
2,carrot,Veggie
3,orange,Fruit
4,cherry,Fruit
5,grape,Fruit
6,onion,Veggie


In [21]:
#select data with row label 1
df.loc[1,:]

Items    apple
Type     Fruit
Name: 1, dtype: object

In [22]:
#select data from row with label 1 and columns Type and Items
df.loc[1,['Type','Items']]

Type     Fruit
Items    apple
Name: 1, dtype: object

In [23]:
#Select Items column 
df[['Items']]

Unnamed: 0,Items
1,apple
2,carrot
3,orange
4,cherry
5,grape
6,onion


In [24]:
#display the index of dataframe
df.index

Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')

In [25]:
#Create a series Price
Price = pd.Series(data = [2.2,1.5,4,3.6,.5],name = 'Price',index = [1,2,3,4,5])

In [26]:
Price

1    2.2
2    1.5
3    4.0
4    3.6
5    0.5
Name: Price, dtype: float64

In [27]:
#Add the Price to dataframe
df['Price'] = Price

In [28]:
#Price is added to the dataframe. Since the value in Price series corresponding to index 6 is missing, 
#dataframe has null value for price in index 6.
df

Unnamed: 0,Items,Type,Price
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,onion,Veggie,


In [29]:
#Check null values in dataframe. Create Summary table for null values
df.isnull().sum()

Items    0
Type     0
Price    1
dtype: int64

In [30]:
#We can either drop the row containing null value
df.dropna()

Unnamed: 0,Items,Type,Price
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5


In [31]:
#We can fill the null value based on the data from other rows. Let us fill the mean of price in place of null value
df.fillna(df.Price.mean())

Unnamed: 0,Items,Type,Price
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,onion,Veggie,2.36


In [32]:
df.fillna(df.Price.mean(), inplace = True)

In [33]:
#resetting the index. Now the index is default value starting from 0 
df.reset_index(inplace = True)

In [34]:
df

Unnamed: 0,index,Items,Type,Price
0,1,apple,Fruit,2.2
1,2,carrot,Veggie,1.5
2,3,orange,Fruit,4.0
3,4,cherry,Fruit,3.6
4,5,grape,Fruit,0.5
5,6,onion,Veggie,2.36


In [35]:
df.rename(columns={'index':'SKU'},inplace = True)

In [36]:
df

Unnamed: 0,SKU,Items,Type,Price
0,1,apple,Fruit,2.2
1,2,carrot,Veggie,1.5
2,3,orange,Fruit,4.0
3,4,cherry,Fruit,3.6
4,5,grape,Fruit,0.5
5,6,onion,Veggie,2.36


In [37]:
#Setting SKU column as index
df.set_index('SKU')

Unnamed: 0_level_0,Items,Type,Price
SKU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,onion,Veggie,2.36


In [38]:
df

Unnamed: 0,SKU,Items,Type,Price
0,1,apple,Fruit,2.2
1,2,carrot,Veggie,1.5
2,3,orange,Fruit,4.0
3,4,cherry,Fruit,3.6
4,5,grape,Fruit,0.5
5,6,onion,Veggie,2.36


In [39]:
df.set_index('SKU',inplace = True)

In [40]:
df

Unnamed: 0_level_0,Items,Type,Price
SKU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,onion,Veggie,2.36


In [41]:
#Replace onion with broccoli
df.Items.replace({'onion':'broccoli'})

SKU
1       apple
2      carrot
3      orange
4      cherry
5       grape
6    broccoli
Name: Items, dtype: object

In [42]:
df

Unnamed: 0_level_0,Items,Type,Price
SKU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,onion,Veggie,2.36


In [43]:
df.Items.replace({'onion':'broccoli'},inplace = True)

In [44]:
df

Unnamed: 0_level_0,Items,Type,Price
SKU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,broccoli,Veggie,2.36


In [47]:
#Update broccoli price to 3 dollars
df.loc[df.Items == 'broccoli','Price'] = 3

In [48]:
df

Unnamed: 0_level_0,Items,Type,Price
SKU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,broccoli,Veggie,3.0


In [49]:
#Adding a new entry
row = pd.DataFrame([ ['potato','Veggie',0.9]],columns=['Items', 'Type', 'Price'],index = [7])

In [50]:
row

Unnamed: 0,Items,Type,Price
7,potato,Veggie,0.9


In [51]:
df = df.append(row)

In [52]:
df

Unnamed: 0,Items,Type,Price
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
5,grape,Fruit,0.5
6,broccoli,Veggie,3.0
7,potato,Veggie,0.9


In [53]:
#Sort values by Price column from greater to lower values
df.sort_values('Price',ascending=False)

Unnamed: 0,Items,Type,Price
3,orange,Fruit,4.0
4,cherry,Fruit,3.6
6,broccoli,Veggie,3.0
1,apple,Fruit,2.2
2,carrot,Veggie,1.5
7,potato,Veggie,0.9
5,grape,Fruit,0.5


In [54]:
#Sort values by Price column from lower to greater values
df.sort_values('Price',ascending=True)

Unnamed: 0,Items,Type,Price
5,grape,Fruit,0.5
7,potato,Veggie,0.9
2,carrot,Veggie,1.5
1,apple,Fruit,2.2
6,broccoli,Veggie,3.0
4,cherry,Fruit,3.6
3,orange,Fruit,4.0
