In [31]:
import numpy as np
import pandas as pd

In [32]:
s1 = pd.Series(np.arange(0, 7, 2))
s1

0    0
1    2
2    4
3    6
dtype: int64

In [33]:
myindex = [3, 5, 7]
price = [100, 200, 300]
s2 = pd.Series(price, index=myindex)
s2

3    100
5    200
7    300
dtype: int64

In [34]:
print(s2.index)
print(s2.values)

Int64Index([3, 5, 7], dtype='int64')
[100 200 300]


In [35]:
s = pd.Series([0, 1, 2, 3, 4, 5])
s[2:4]

2    2
3    3
dtype: int64

In [36]:
x = pd.Series([1, 2])
y = pd.Series([2, 4])
x * y

0    2
1    8
dtype: int64

In [37]:
fruits = ['Orange', 'Apple', 'Grape']
x = pd.Series([20, 30, 40], index=fruits)
print(x[['Apple', 'Orange']])

Apple     30
Orange    20
dtype: int64


# DataFrame

In [38]:
years = range(2020, 2023)
beijing = pd.Series([20, 21, 19], index=years)
hongkong = pd.Series([25, 26, 27], index=years)
singapore = pd.Series([30, 29, 31], index=years)
citydf = pd.concat([beijing, hongkong, singapore])
print(type(citydf))
print(citydf)

<class 'pandas.core.series.Series'>
2020    20
2021    21
2022    19
2020    25
2021    26
2022    27
2020    30
2021    29
2022    31
dtype: int64


In [39]:
citydf2 = pd.concat([beijing, hongkong, singapore], axis=1)
print(type(citydf2))
print(citydf2)

<class 'pandas.core.frame.DataFrame'>
       0   1   2
2020  20  25  30
2021  21  26  29
2022  19  27  31


In [40]:
citydf2.columns = ['Beijing', 'Hongkong', 'Singapore']
print(citydf2)

      Beijing  Hongkong  Singapore
2020       20        25         30
2021       21        26         29
2022       19        27         31


In [41]:
# Use dictionary to create DataFrame
data = [{'apple': 50, 'orange': 30, 'grape': 80}, {'apple': 50, 'grape': 80}]
fruits = pd.DataFrame(data)
print(fruits)

   apple  orange  grape
0     50    30.0     80
1     50     NaN     80


In [42]:
cities = {'country': ['China', 'Japan', 'Singapore'], 
          'town': ['Beijing', 'Tokyo', 'Singapore'],
          'population': [2000, 1600, 600]}
citydf3 = pd.DataFrame(cities)
print(citydf3)

     country       town  population
0      China    Beijing        2000
1      Japan      Tokyo        1600
2  Singapore  Singapore         600


In [43]:
citydf3 = pd.DataFrame(cities, index=['first', 'second', 'third'])
print(citydf3)

          country       town  population
first       China    Beijing        2000
second      Japan      Tokyo        1600
third   Singapore  Singapore         600


In [44]:
citydf4 = pd.DataFrame(cities, columns=['town', 'population'], index=cities['country'])
print(citydf4)

                town  population
China        Beijing        2000
Japan          Tokyo        1600
Singapore  Singapore         600


# Data Processing

In [45]:
cities = {'Country':['China', 'China', 'Thailand', 'Japan', 'Singapore'], 
          'Town':['Beijing', 'Shanghai', 'Bangkok', 'Tokyo', 'Singapore'], 
          'Population':[2000, 2300, 900, 1600, 600]}
df = pd.DataFrame(cities, columns=['Town', 'Population'], index=cities['Country'])
print(df)

                Town  Population
China        Beijing        2000
China       Shanghai        2300
Thailand     Bangkok         900
Japan          Tokyo        1600
Singapore  Singapore         600


In [46]:
df.at['China', 'Town']

China     Beijing
China    Shanghai
Name: Town, dtype: object

In [47]:
df.iat[2,0]

'Bangkok'

In [54]:
df.loc['Singapore']

Town          Singapore
Population          600
Name: Singapore, dtype: object

In [55]:
df.loc[['Japan', 'Thailand']]

Unnamed: 0,Town,Population
Japan,Tokyo,1600
Thailand,Bangkok,900


In [56]:
df.loc['China':'Thailand','Town':'Population']

Unnamed: 0,Town,Population
China,Beijing,2000
China,Shanghai,2300
Thailand,Bangkok,900


In [57]:
df.iloc[0]

Town          Beijing
Population       2000
Name: China, dtype: object

In [58]:
df[df['Population'] > 1000]

Unnamed: 0,Town,Population
China,Beijing,2000
China,Shanghai,2300
Japan,Tokyo,1600


# Numpy with Pandas

In [59]:
name = ['Frank', 'Peter', 'John']
score = ['first', 'second', 'final']
df = pd.DataFrame(np.random.randint(60, 100, size=(3,3)), 
                  columns=name, index=score)
print(df)

        Frank  Peter  John
first      89     61    77
second     83     64    87
final      74     61    63


In [61]:
s1 = pd.Series([1, np.nan, 5])
s2 = pd.Series([np.nan, 6, 8])
x = s1.add(s2)
print(x)

0     NaN
1     NaN
2    13.0
dtype: float64


In [63]:
print(s1.isna())
print(s1.notna())

0    False
1     True
2    False
dtype: bool
0     True
1    False
2     True
dtype: bool
