In [2]:
import numpy as np
import pandas as pd

# SERIES

In [12]:
serr = pd.Series([34,23,43,55,64,43,22])
serr

0    34
1    23
2    43
3    55
4    64
5    43
6    22
dtype: int64

In [13]:
serr.values

array([34, 23, 43, 55, 64, 43, 22], dtype=int64)

In [20]:
serr.values[1:4] 

array([23, 43, 55], dtype=int64)

In [21]:
serr.index

RangeIndex(start=0, stop=7, step=1)

In [24]:
serr.index[2]

2

In [27]:
serr2 = pd.Series([34,23,43,55,64,43,22], 
        index=['a','c','d','f','g','t','q'])
serr2

a    34
c    23
d    43
f    55
g    64
t    43
q    22
dtype: int64

In [28]:
serr2.index

Index(['a', 'c', 'd', 'f', 'g', 't', 'q'], dtype='object')

In [29]:
dictt = {"amsterdam": 4, "berlin": 6}

In [30]:
serr3 = pd.Series(dictt)
serr3

amsterdam    4
berlin       6
dtype: int64

In [33]:
serr3.index

Index(['amsterdam', 'berlin'], dtype='object')

In [31]:
serr3.keys()

Index(['amsterdam', 'berlin'], dtype='object')

In [34]:
"amsterdam" in serr3

True

In [35]:
"tokyo" in serr3

False

In [36]:
4 in serr3.values

True

# DATAFRAME

In [37]:
city_revenues = pd.Series(
    [4200, 8000, 6500],
    index=["Amsterdam", "Toronto", "Tokyo"]
)
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [38]:
city_employee_count = pd.Series({"Amsterdam": 5, "Tokyo": 8})
city_employee_count

Amsterdam    5
Tokyo        8
dtype: int64

In [39]:
city_data = pd.DataFrame({
            "revenue": city_revenues, 
            "employee": city_employee_count
            })
city_data

Unnamed: 0,revenue,employee
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [49]:
city_data.axes

[Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object'),
 Index(['revenue', 'employee'], dtype='object')]

In [58]:
type(city_data.axes)

list

In [52]:
city_data.axes[1]

Index(['revenue', 'employee'], dtype='object')

In [50]:
city_data.axes[0]

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [40]:
city_data.columns

Index(['revenue', 'employee'], dtype='object')

In [54]:
city_data.keys()

Index(['revenue', 'employee'], dtype='object')

In [42]:
city_data.index

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [45]:
city_data.values

array([[4.2e+03, 5.0e+00],
       [6.5e+03, 8.0e+00],
       [8.0e+03,     nan]])

In [63]:
city_data

Unnamed: 0,revenue,employee
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [65]:
"Amsterdam" in city_data

False

In [66]:
"Amsterdam" in city_data.index

True

In [64]:
"revenue" in city_data

True

# ACCSESSING SERIES AND DATAFRAME

In [67]:
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [69]:
city_revenues['Amsterdam']

4200

In [68]:
city_revenues[0]

4200

In [81]:
city_revenues.iloc[0]

4200

In [90]:
city_revenues[:1]

Amsterdam    4200
dtype: int64

In [88]:
city_revenues[1:]

Toronto    8000
Tokyo      6500
dtype: int64

### .loc dan .iloc

#### `SERIES`

In [92]:
colors = pd.Series(
    ["red", "purple", "blue", "green", "yellow"],
    index=[1, 2, 3, 5, 8]
)
colors

1       red
2    purple
3      blue
5     green
8    yellow
dtype: object

In [93]:
colors.loc[8]

'yellow'

In [94]:
colors.iloc[4]

'yellow'

In [98]:
colors[8]

'yellow'

In [95]:
colors.iloc[1:3]

2    purple
3      blue
dtype: object

In [99]:
colors.iloc[0:4:3]

1      red
5    green
dtype: object

In [103]:
colors.iloc[[0,1,4]]

1       red
2    purple
8    yellow
dtype: object

In [100]:
colors.loc[3:8]

3      blue
5     green
8    yellow
dtype: object

In [101]:
colors.loc[[1, 3, 5]]

1      red
3     blue
5    green
dtype: object

#### `DATAFRAME`

In [104]:
city_data

Unnamed: 0,revenue,employee
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [107]:
city_data[['revenue','employee']]

Unnamed: 0,revenue,employee
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [112]:
city_data.loc['Amsterdam']

revenue     4200.0
employee       5.0
Name: Amsterdam, dtype: float64

In [114]:
city_data.loc['Amsterdam':'Tokyo']

Unnamed: 0,revenue,employee
Amsterdam,4200,5.0
Tokyo,6500,8.0


In [118]:
city_data.loc['Amsterdam':'Tokyo', 'revenue']

Amsterdam    4200
Tokyo        6500
Name: revenue, dtype: int64

In [113]:
city_data.iloc[0]

revenue     4200.0
employee       5.0
Name: Amsterdam, dtype: float64

In [120]:
city_data.iloc[0:2]

Unnamed: 0,revenue,employee
Amsterdam,4200,5.0
Tokyo,6500,8.0


In [124]:
city_data.iloc[0:2, 0]

Amsterdam    4200
Tokyo        6500
Name: revenue, dtype: int64

# COMBINING DATASET

In [125]:
further_city_data = pd.DataFrame(
    {"revenue": [7000, 3400], "employee_count": [2, 2]},
    index=["New York", "Barcelona"]
)

In [126]:
further_city_data

Unnamed: 0,revenue,employee_count
New York,7000,2
Barcelona,3400,2


In [127]:
city_data

Unnamed: 0,revenue,employee
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [145]:
all_city_data = pd.concat([city_data,further_city_data], sort=False)
all_city_data

Unnamed: 0,revenue,employee,employee_count
Amsterdam,4200,5.0,
Tokyo,6500,8.0,
Toronto,8000,,
New York,7000,,2.0
Barcelona,3400,,2.0


In [138]:
all_city_data = pd.concat([city_data,further_city_data], sort=True)
all_city_data

Unnamed: 0,employee,employee_count,revenue
Amsterdam,5.0,,4200
Tokyo,8.0,,6500
Toronto,,,8000
New York,,2.0,7000
Barcelona,,2.0,3400


In [139]:
city_countries = pd.DataFrame({
    "country": ["Holland", "Japan", "Holland", "Canada", "Spain"],
    "capital": [1, 1, 0, 0, 0]},
    index=["Amsterdam", "Tokyo", "Rotterdam", "Toronto", "Barcelona"]
)

In [140]:
city_countries

Unnamed: 0,country,capital
Amsterdam,Holland,1
Tokyo,Japan,1
Rotterdam,Holland,0
Toronto,Canada,0
Barcelona,Spain,0


In [146]:
all_city_data

Unnamed: 0,revenue,employee,employee_count
Amsterdam,4200,5.0,
Tokyo,6500,8.0,
Toronto,8000,,
New York,7000,,2.0
Barcelona,3400,,2.0


In [148]:
cities = pd.concat([all_city_data,city_countries], axis=1, sort=True)
cities

Unnamed: 0,revenue,employee,employee_count,country,capital
Amsterdam,4200.0,5.0,,Holland,1.0
Barcelona,3400.0,,2.0,Spain,0.0
New York,7000.0,,2.0,,
Rotterdam,,,,Holland,0.0
Tokyo,6500.0,8.0,,Japan,1.0
Toronto,8000.0,,,Canada,0.0


In [149]:
cities = pd.concat([all_city_data,city_countries], axis=1, sort=True, join="inner")
cities

Unnamed: 0,revenue,employee,employee_count,country,capital
Amsterdam,4200,5.0,,Holland,1
Barcelona,3400,,2.0,Spain,0
Tokyo,6500,8.0,,Japan,1
Toronto,8000,,,Canada,0


# MERGE

In [150]:
countries = pd.DataFrame({
    "population_millions": [17, 127, 37],
    "continent": ["Europe", "Asia", "North America"]
}, index=["Holland", "Japan", "Canada"])

In [151]:
countries

Unnamed: 0,population_millions,continent
Holland,17,Europe
Japan,127,Asia
Canada,37,North America


In [152]:
cities

Unnamed: 0,revenue,employee,employee_count,country,capital
Amsterdam,4200,5.0,,Holland,1
Barcelona,3400,,2.0,Spain,0
Tokyo,6500,8.0,,Japan,1
Toronto,8000,,,Canada,0


In [161]:
pd.merge(cities, countries, left_on="country", right_index=True)

Unnamed: 0,revenue,employee,employee_count,country,capital,population_millions,continent
Amsterdam,4200,5.0,,Holland,1,17,Europe
Tokyo,6500,8.0,,Japan,1,127,Asia
Toronto,8000,,,Canada,0,37,North America


In [158]:
pd.merge(
    cities,
    countries,
    left_on="country",
    right_index=True,
    how="left"
)

Unnamed: 0,revenue,employee,employee_count,country,capital,population_millions,continent
Amsterdam,4200,5.0,,Holland,1,17.0,Europe
Barcelona,3400,,2.0,Spain,0,,
Tokyo,6500,8.0,,Japan,1,127.0,Asia
Toronto,8000,,,Canada,0,37.0,North America
