In [None]:
import pandas as pd 
import numpy as np

**Pandas Series**

In [None]:
g7_population = pd.Series([45.467, 25.457, 95.759, 154.78, 159.795, 65.45,87.24])

In [None]:
g7_population  #The underlying Data Structure that pandas uses to store these objects is numpy

0     45.467
1     25.457
2     95.759
3    154.780
4    159.795
5     65.450
6     87.240
dtype: float64

In [None]:
g7_population.name ="G7 Population In Millions"

In [None]:
g7_population

0     45.467
1     25.457
2     95.759
3    154.780
4    159.795
5     65.450
6     87.240
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population.values  #Actually backed by numpy arrays

array([ 45.467,  25.457,  95.759, 154.78 , 159.795,  65.45 ,  87.24 ])

In [None]:
g7_population[0]

45.467

In [None]:
g7_population[1]

25.457

In [None]:
g7_population.index

RangeIndex(start=0, stop=7, step=1)

In [None]:
g7_population.index = [
    'Canada',
    'France',
    'Germany',
    'United States',
    'Japan',
    'Italy',
    'United Kingdom'
]

In [None]:
g7_population

Canada             45.467
France             25.457
Germany            95.759
United States     154.780
Japan             159.795
Italy              65.450
United Kingdom     87.240
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population[0]    #g7_population['Cananda']

45.467

In [None]:
g7_population[0] = 54.457

In [None]:
# Another way to create Series
pd.Series({
    'Canada': 45.467,
    'France': 25.457,
    'Germany':80.94,
    'Italy':65.450,
    'United States': 159.795,
    'United Kingdom':87.240,
    'Japan':95.759
}, name = "G7 Population In millions")

Canada             45.467
France             25.457
Germany            80.940
Italy              65.450
United States     159.795
United Kingdom     87.240
Japan              95.759
Name: G7 Population In millions, dtype: float64

**Creating Series out of other series**

In [None]:
pd.Series(g7_population, index = ['France','Germany','Italy','South Korea'])

France         25.457
Germany        95.759
Italy          65.450
South Korea       NaN
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population

Canada             45.467
France             25.457
Germany            95.759
United States     154.780
Japan             159.795
Italy              65.450
United Kingdom     87.240
dtype: float64

**Indexing**

In [None]:
g7_population['Japan']

159.795

In [None]:
print(g7_population.iloc[0])
print(g7_population[0])

45.467
45.467


In [None]:
g7_population.iloc[-1]

87.24

In [None]:
g7_population[[0, 1]]

Canada    54.457
France    25.457
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population[['Japan', 'United States']]

Japan            159.795
United States    154.780
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population.iloc[[0, 1]]

Canada    54.457
France    25.457
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population.loc['Canada':'Japan']   
# g7_population['Canada':'Japan']  

Canada            45.467
France            25.457
Germany           95.759
United States    154.780
Japan            159.795
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population.iloc[0:5] 
# g7_population[0:5] 

Canada            45.467
France            25.457
Germany           95.759
United States    154.780
Japan            159.795
Name: G7 Population In Millions, dtype: float64

**filtering**

In [None]:
g7_population > 70

Canada            False
France            False
Germany            True
United States      True
Japan              True
Italy             False
United Kingdom     True
dtype: bool

In [None]:
g7_population[g7_population > 70]

Germany            95.759
United States     154.780
Japan             159.795
United Kingdom     87.240
Name: G7 Population In Millions, dtype: float64

In [None]:
g7_population.mean()

90.564

In [None]:
g7_population[ g7_population > g7_population.mean()]

Germany           95.759
United States    154.780
Japan            159.795
dtype: float64

In [None]:
g7_population.std()

51.41017550381766

In [None]:
g7_population * 1_000_000

Canada             45467000.0
France             25457000.0
Germany            95759000.0
United States     154780000.0
Japan             159795000.0
Italy              65450000.0
United Kingdom     87240000.0
dtype: float64

In [None]:
print(g7_population.mean() - g7_population.std()/2)
print(g7_population.mean() + g7_population.std()/2)

64.85891224809116
116.26908775190883


In [None]:
g7_population[(g7_population > g7_population.mean() - g7_population.std()/2) |(g7_population > g7_population.mean() + g7_population.std()/2)]

Germany            95.759
United States     154.780
Japan             159.795
Italy              65.450
United Kingdom     87.240
dtype: float64

In [None]:
g7_population

Canada             45.467
France             25.457
Germany            95.759
United States     154.780
Japan             159.795
Italy              65.450
United Kingdom     87.240
dtype: float64

In [None]:
np.log(g7_population)

Canada            3.816987
France            3.236991
Germany           4.561835
United States     5.042005
Japan             5.073892
Italy             4.181286
United Kingdom    4.468663
dtype: float64

In [None]:
print(f"{g7_population['Canada':'Japan'].mean():.4f}")

96.2516


In [None]:
g7_population

Canada             45.467
France             25.457
Germany            95.759
United States     154.780
Japan             159.795
Italy              65.450
United Kingdom     87.240
dtype: float64

In [None]:
g7_population['United Kingdom'] = 90.7

In [None]:
g7_population.iloc[1] = 45.457

In [None]:
g7_population[g7_population < 90] = 45.457  

In [None]:
g7_population

Canada             45.457
France             45.457
Germany            95.759
United States     154.780
Japan             159.795
Italy              45.457
United Kingdom     90.700
dtype: float64

In [None]:
g7_population['Japan']

159.795

In [None]:
g7_population[[0,1]]

Canada    45.457
France    45.457
Name: G7 Population In Millions, dtype: float64

**DataFrame**

In [None]:
fruits = pd.DataFrame({
    'Apples':[30],
    'Bananas':[21]
})
fruits

Unnamed: 0,Apples,Bananas
0,30,21


In [None]:
# from array
fruits = pd.DataFrame(data = [[30, 21]], columns = ["Apples", "Bananas"])
fruits

Unnamed: 0,Apples,Bananas
0,30,21


In [None]:
fruits = pd.DataFrame(data = [[30, 21], [41, 34]], columns = ["Apples", "Bananas"], index = ["2017 Sales", "2018 Sales"])
fruits

Unnamed: 0,Apples,Bananas
2017 Sales,30,21
2018 Sales,41,34


In [None]:
fruits = pd.DataFrame({
    'Apples':[30, 41],
    'Bananas':[21, 34]
}, index = ["2017 Sales", "2018 Sales"])
fruits

Unnamed: 0,Apples,Bananas
2017 Sales,30,21
2018 Sales,41,34


In [None]:
series = pd.Series({
    'Canada': 45.467,
    'France': 25.457,
    'Germany':80.94,
    'Italy':65.450,
    'United States': 159.795,
    'United Kingdom':87.240,
    'Japan':95.759
})

In [None]:
dataframe = pd.DataFrame(data=series, columns=['Population(in Million)'])

In [None]:
dataframe = dataframe.reset_index(names="Country")
dataframe

Unnamed: 0,Country,Population(in Million)
0,Canada,45.467
1,France,25.457
2,Germany,80.94
3,Italy,65.45
4,United States,159.795
5,United Kingdom,87.24
6,Japan,95.759


In [None]:
dataframe.Country.values

array(['Canada', 'France', 'Germany', 'Italy', 'United States',
       'United Kingdom', 'Japan'], dtype=object)

In [None]:
data = {
    'Country': ['Canada', 'France', 'Germany', 'Italy', 'United States', 'United Kingdom', 'Japan'],
    'GDP': [1.647, 2.716, 4.322, 2.111, 21.433, 2.856, 5.081]  # GDP values are in trillions of US dollars (2021)
}
series = pd.Series(data['GDP'], index=data['Country'], name = 'GDP values are in trillions of US dollars (2021)')
series

Canada             1.647
France             2.716
Germany            4.322
Italy              2.111
United States     21.433
United Kingdom     2.856
Japan              5.081
Name: GDP values are in trillions of US dollars (2021), dtype: float64

In [None]:
series = series.reset_index(drop=True)
series

0     1.647
1     2.716
2     4.322
3     2.111
4    21.433
5     2.856
6     5.081
Name: GDP values are in trillions of US dollars (2021), dtype: float64

In [None]:
dataframe = pd.concat([dataframe, series], axis=1)
dataframe

Unnamed: 0,Country,Population(in Million),GDP values are in trillions of US dollars (2021)
0,Canada,45.467,1.647
1,France,25.457,2.716
2,Germany,80.94,4.322
3,Italy,65.45,2.111
4,United States,159.795,21.433
5,United Kingdom,87.24,2.856
6,Japan,95.759,5.081


In [None]:
dataframe.loc[0:5, ['Country', 'Population(in Million)']] # Using Loc, and 6 rows are returned

Unnamed: 0,Country,Population(in Million)
0,Canada,45.467
1,France,25.457
2,Germany,80.94
3,Italy,65.45
4,United States,159.795
5,United Kingdom,87.24


In [None]:
dataframe.iloc[0:5, [0, 1]] # Using iloc returns 5 rows

Unnamed: 0,Country,Population(in Million)
0,Canada,45.467
1,France,25.457
2,Germany,80.94
3,Italy,65.45
4,United States,159.795


In [None]:
population_series = dataframe.iloc[: , 1] * (10**6)
population_series.name = 'population_series'
population_series

0     45467000.0
1     25457000.0
2     80940000.0
3     65450000.0
4    159795000.0
5     87240000.0
6     95759000.0
Name: population_series, dtype: float64

In [None]:
gdpseries = dataframe.iloc[: , 2] * (10**12)
gdpseries.name = 'gdpseries'         
gdpseries

0    1.647000e+12
1    2.716000e+12
2    4.322000e+12
3    2.111000e+12
4    2.143300e+13
5    2.856000e+12
6    5.081000e+12
Name: gdpseries, dtype: float64

In [None]:
dataframe['gdp_per_capita(USD$)'] = gdpseries / population_series

In [None]:
dataframe

Unnamed: 0,Country,Population(in Million),GDP values are in trillions of US dollars (2021),gdp_per_capita(USD$)
0,Canada,45.467,1.647,36224.074604
1,France,25.457,2.716,106689.712063
2,Germany,80.94,4.322,53397.578453
3,Italy,65.45,2.111,32253.628724
4,United States,159.795,21.433,134128.10163
5,United Kingdom,87.24,2.856,32737.276479
6,Japan,95.759,5.081,53060.286762


In [None]:
dataframe.iloc[:, [3]].idxmax()

gdp_per_capita(USD$)    4
dtype: int64

In [None]:
dataframe.iloc[dataframe.iloc[:, [3]].idxmax()]     # Row with the highest gdp_per_capita(USD$)

Unnamed: 0,Country,Population(in Million),GDP values are in trillions of US dollars (2021),gdp_per_capita(USD$)
4,United States,159.795,21.433,134128.10163
