![rmotr](https://user-images.githubusercontent.com/7065401/52071918-bda15380-2562-11e9-828c-7f95297e4a82.png)
<hr style="margin-bottom: 40px;">

<img src="https://user-images.githubusercontent.com/7065401/75165824-badf4680-5701-11ea-9c5b-5475b0a33abf.png"
    style="width:300px; float: right; margin: 0 40px 40px 40px;"></img>

# Pandas - Series

![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

## Hands on! 

In [2]:
import numpy as np
import pandas as pd

In [4]:
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])

In [5]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [6]:
g7_pop.name = 'G7 Population in millions'

In [7]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

In [8]:
g7_pop.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

In [9]:
type(g7_pop.values)

numpy.ndarray

In [10]:
g7_pop[1]

63.951

In [11]:
g7_pop[0]

35.467

In [12]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [13]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

In [14]:
g7_pop.index = [
    'Canada',
    'United States',
    'India',
    'France',
    'Korea',
    'Germany',
    'Italy'    
]

In [15]:
g7_pop

Canada            35.467
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [16]:
pd.Series({
    'Canada': 35.467,
    'France': 63.951,
    'Germany': 80.94,
    'Italy': 60.665,
    'Japan': 127.061,
    'United Kingdom': 64.511,
    'United States': 318.523
}, name='G7 Population in millions')

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [17]:
g7_pop

Canada            35.467
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [21]:
pd.Series(g7_pop, index = ['Canada', 'Korea', 'France', 'Italy', 'Spain'])

Canada     35.467
Korea     127.061
France     60.665
Italy     318.523
Spain         NaN
Name: G7 Population in millions, dtype: float64

In [24]:
g7_pop['Canada']

35.467

In [26]:
g7_pop['Canada']

35.467

In [27]:
g7_pop.iloc[0]

35.467

In [29]:
g7_pop

Canada            35.467
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [28]:
g7_pop.iloc[-1]

318.523

In [30]:
g7_pop.iloc[1]

63.951

In [32]:
g7_pop[['France','Korea']] # we can get multi values by using multi indices

France     60.665
Korea     127.061
Name: G7 Population in millions, dtype: float64

### _The result is selecting multiple values using multiple indices_

In [36]:
g7_pop[[0,3]]

Canada    35.467
France    60.665
Name: G7 Population in millions, dtype: float64

### In Pandas for slicing the UPPER limit is also included

In [35]:
g7_pop['Canada': 'France']

Canada           35.467
United States    63.951
India            80.940
France           60.665
Name: G7 Population in millions, dtype: float64

## <font color='Maroon'> Conditional Selection (Boolean Series) </font>



In [37]:
g7_pop

Canada            35.467
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [41]:
g7_pop > 80

Canada           False
United States    False
India             True
France           False
Korea             True
Germany          False
Italy             True
Name: G7 Population in millions, dtype: bool

In [43]:
g7_pop[g7_pop > 80] # here inner series gives the indices for outer series result

India     80.940
Korea    127.061
Italy    318.523
Name: G7 Population in millions, dtype: float64

In [44]:
g7_pop.mean()

107.30257142857144

In [47]:
g7_pop.max()

318.523

In [48]:
g7_pop.min()

35.467

In [49]:
g7_pop.std()

97.24996987121581

In [50]:
g7_pop[g7_pop > g7_pop.mean()]

Korea    127.061
Italy    318.523
Name: G7 Population in millions, dtype: float64

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Operations and methods

In [54]:
g7_pop * 100000

Canada            3546700.0
United States     6395100.0
India             8094000.0
France            6066500.0
Korea            12706100.0
Germany           6451100.0
Italy            31852300.0
Name: G7 Population in millions, dtype: float64

In [55]:
g7_pop.mean()

107.30257142857144

In [62]:
g7_pop[~(g7_pop > 80)]

Canada           35.467
United States    63.951
France           60.665
Germany          64.511
Name: G7 Population in millions, dtype: float64

In [65]:
np.log10(g7_pop)

Canada           1.549824
United States    1.805847
India            1.908163
France           1.782938
Korea            2.104012
Germany          1.809634
Italy            2.503141
Name: G7 Population in millions, dtype: float64

In [66]:
g7_pop

Canada            35.467
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [72]:
g7_pop['Canada':'India'].mean()

60.11933333333334

In [73]:
g7_pop

Canada            35.467
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [75]:
g7_pop[(g7_pop > 60) & (g7_pop < 100)]

United States    63.951
India            80.940
France           60.665
Germany          64.511
Name: G7 Population in millions, dtype: float64

In [80]:
g7_pop[(g7_pop > 80) | (g7_pop < 40)] # | is OR operator

Canada     35.467
India      80.940
Korea     127.061
Italy     318.523
Name: G7 Population in millions, dtype: float64

In [81]:
g7_pop

Canada            35.467
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [84]:
g7_pop['Canada'] = 23.5 # assigning the value to series element

In [85]:
g7_pop

Canada            23.500
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            318.523
Name: G7 Population in millions, dtype: float64

In [89]:
g7_pop.iloc[1]

63.951

In [90]:
g7_pop.iloc[0]

23.5

In [91]:
g7_pop.iloc[-1] = 200.445

In [92]:
g7_pop

Canada            23.500
United States     63.951
India             80.940
France            60.665
Korea            127.061
Germany           64.511
Italy            200.445
Name: G7 Population in millions, dtype: float64

In [93]:
g7_pop[g7_pop >100] = 99.99

In [94]:
g7_pop

Canada           23.500
United States    63.951
India            80.940
France           60.665
Korea            99.990
Germany          64.511
Italy            99.990
Name: G7 Population in millions, dtype: float64

In [95]:
g7_pop['Korea'] = 223.445

In [98]:
%time g7_pop

Wall time: 1.01 ms


Canada            23.500
United States     63.951
India             80.940
France            60.665
Korea            223.445
Germany           64.511
Italy             99.990
Name: G7 Population in millions, dtype: float64

## <font color = 'red'>Dataframes in Pandas </font>

> ### _DF are more like a table_

In [108]:
df = pd.DataFrame({
    'Population' : [23.445, 44.445, 33.656, 66.565, 67.445],
    'GDP' : [
        123456,
         233443,
         234234,
         324324,
         766767           
    ],
    'Surface Area' : [
        342343,
        23423424,
        657567,
        565464,
        879878        
    ],
    'HDI' : [
        0.556,
        0.784,
        0.556,
        0.895,
        0.578        
    ],
    'Continent' : [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia'        
    ]
    
}, columns=['Population','GDP','Surface Area','HDI','Continent'])

#### _Columns are kept to keep the column orders in same sequence._

In [109]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,23.445,123456,342343,0.556,America
1,44.445,233443,23423424,0.784,Europe
2,33.656,234234,657567,0.556,Europe
3,66.565,324324,565464,0.895,Europe
4,67.445,766767,879878,0.578,Asia


In [110]:
df.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'China'
]

In [111]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.445,123456,342343,0.556,America
France,44.445,233443,23423424,0.784,Europe
Germany,33.656,234234,657567,0.556,Europe
Italy,66.565,324324,565464,0.895,Europe
China,67.445,766767,879878,0.578,Asia


In [112]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [114]:
 df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'China'], dtype='object')

In [116]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Canada to China
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    5 non-null      float64
 1   GDP           5 non-null      int64  
 2   Surface Area  5 non-null      int64  
 3   HDI           5 non-null      float64
 4   Continent     5 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 240.0+ bytes


In [117]:
 df.shape

(5, 5)

In [120]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.445,123456,342343,0.556,America
France,44.445,233443,23423424,0.784,Europe
Germany,33.656,234234,657567,0.556,Europe
Italy,66.565,324324,565464,0.895,Europe
China,67.445,766767,879878,0.578,Asia


In [119]:
df.describe() # only visible for numeric columns/ statistic fields

Unnamed: 0,Population,GDP,Surface Area,HDI
count,5.0,5.0,5.0,5.0
mean,47.1112,336444.8,5173735.0,0.6738
std,19.622397,250870.861685,10203710.0,0.156529
min,23.445,123456.0,342343.0,0.556
25%,33.656,233443.0,565464.0,0.556
50%,44.445,234234.0,657567.0,0.578
75%,66.565,324324.0,879878.0,0.784
max,67.445,766767.0,23423420.0,0.895


In [125]:
df.dtypes # data types of fields

Population      float64
GDP               int64
Surface Area      int64
HDI             float64
Continent        object
dtype: object

In [128]:
df.size

25

In [130]:
df.dtypes.value_counts()

int64      2
float64    2
object     1
dtype: int64

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Indexing, Slicing and Selection

In [131]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.445,123456,342343,0.556,America
France,44.445,233443,23423424,0.784,Europe
Germany,33.656,234234,657567,0.556,Europe
Italy,66.565,324324,565464,0.895,Europe
China,67.445,766767,879878,0.578,Asia


In [149]:
df.loc['Canada'] #loc does a transpose of the dataframe

Population       23.445
GDP              123456
Surface Area     342343
HDI               0.556
Continent       America
Name: Canada, dtype: object

In [144]:
df.loc['Italy'] #horizontal data filter

Population      66.565
GDP             324324
Surface Area    565464
HDI              0.895
Continent       Europe
Name: Italy, dtype: object

In [145]:
df.iloc[1] #iloc allows to feed the numerical index #horizontal data filter

Population        44.445
GDP               233443
Surface Area    23423424
HDI                0.784
Continent         Europe
Name: France, dtype: object

In [146]:
df['Population'] # column level data for all the rows #vertical data filter

Canada     23.445
France     44.445
Germany    33.656
Italy      66.565
China      67.445
Name: Population, dtype: float64

In [153]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.445,123456,342343,0.556,America
France,44.445,233443,23423424,0.784,Europe
Germany,33.656,234234,657567,0.556,Europe
Italy,66.565,324324,565464,0.895,Europe
China,67.445,766767,879878,0.578,Asia


In [155]:
# we can use to_Frame to see the output series data in DataFrame format itself..
df['Population'].to_frame()

Unnamed: 0,Population
Canada,23.445
France,44.445
Germany,33.656
Italy,66.565
China,67.445


In [159]:
df[['Population','GDP']] # can be selected multiple columns from dataframe

Unnamed: 0,Population,GDP
Canada,23.445,123456
France,44.445,233443
Germany,33.656,234234
Italy,66.565,324324
China,67.445,766767


In [161]:
df.iloc[0]

Population       23.445
GDP              123456
Surface Area     342343
HDI               0.556
Continent       America
Name: Canada, dtype: object

In [162]:
df.loc['China']

Population      67.445
GDP             766767
Surface Area    879878
HDI              0.578
Continent         Asia
Name: China, dtype: object

In [None]:
df['Population','Continent'].to_frame() # multiple need not be used with to_frame function

In [168]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.445,123456,342343,0.556,America
France,44.445,233443,23423424,0.784,Europe
Germany,33.656,234234,657567,0.556,Europe
Italy,66.565,324324,565464,0.895,Europe
China,67.445,766767,879878,0.578,Asia


In [169]:
df[1:3] #hifger limit is not included

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,44.445,233443,23423424,0.784,Europe
Germany,33.656,234234,657567,0.556,Europe


In [171]:
 df['Canada' : 'Italy']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.445,123456,342343,0.556,America
France,44.445,233443,23423424,0.784,Europe
Germany,33.656,234234,657567,0.556,Europe
Italy,66.565,324324,565464,0.895,Europe


In [174]:
df.loc['Canada' : 'Italy' , 'Population'].to_frame()

Unnamed: 0,Population
Canada,23.445
France,44.445
Germany,33.656
Italy,66.565
