In [5]:
import numpy as np
import pandas as pd

def make_random_df(index, columns, seed):
    np.random.seed(seed)
    df = pd.DataFrame()
    for column in columns:
        df[column] = np.random.choice(range(1,101),len(index))
    df.index = index
    return df

columns = ["apple","orange","banana"]
df_data1 = make_random_df(range(1,3),columns,0)
df_data2 = make_random_df(range(1,5),columns,1)

print(df_data1)
print()
print(df_data2)
print()

df1 = pd.concat([df_data1, df_data2],axis=0)
df2 = pd.concat([df_data1, df_data2],axis=1)

print(df1)
print()
print(df2)



   apple  orange  banana
1     45      65      68
2     48      68      10

   apple  orange  banana
1     38      76      17
2     13       6       2
3     73      80      77
4     10      65      72

   apple  orange  banana
1     45      65      68
2     48      68      10
1     38      76      17
2     13       6       2
3     73      80      77
4     10      65      72

   apple  orange  banana  apple  orange  banana
1   45.0    65.0    68.0     38      76      17
2   48.0    68.0    10.0     13       6       2
3    NaN     NaN     NaN     73      80      77
4    NaN     NaN     NaN     10      65      72


In [6]:
columns1 = ["apple","orange","banana"]
columns2 = ["orange","kiwi","banana"]

df_data1 = make_random_df(range(1,5),columns1,0)
df_data2 = make_random_df(np.arange(1,8,2),columns2,1)

print(df_data1)
print()
print(df_data2)
print()

df1 = pd.concat([df_data1,df_data2], axis=0)
df2 = pd.concat([df_data1, df_data2], axis=1)

print(df1)
print()
print(df2)



   apple  orange  banana
1     45      68      37
2     48      10      88
3     65      84      71
4     68      22      89

   orange  kiwi  banana
1      38    76      17
3      13     6       2
5      73    80      77
7      10    65      72

   apple  orange  banana  kiwi
1   45.0      68      37   NaN
2   48.0      10      88   NaN
3   65.0      84      71   NaN
4   68.0      22      89   NaN
1    NaN      38      17  76.0
3    NaN      13       2   6.0
5    NaN      73      77  80.0
7    NaN      10      72  65.0

   apple  orange  banana  orange  kiwi  banana
1   45.0    68.0    37.0    38.0  76.0    17.0
2   48.0    10.0    88.0     NaN   NaN     NaN
3   65.0    84.0    71.0    13.0   6.0     2.0
4   68.0    22.0    89.0     NaN   NaN     NaN
5    NaN     NaN     NaN    73.0  80.0    77.0
7    NaN     NaN     NaN    10.0  65.0    72.0


In [7]:
columns = ["apple","orange","banana"]
df_data1 = make_random_df(range(1,5),columns,0)
df_data2 = make_random_df(range(1,5),columns,1)

df = pd.concat([df_data1,df_data2],axis=1,keys=["X","Y"])
Y_banana = df["Y","banana"]

print(df)
print()
print(Y_banana)



      X                   Y              
  apple orange banana apple orange banana
1    45     68     37    38     76     17
2    48     10     88    13      6      2
3    65     84     71    73     80     77
4    68     22     89    10     65     72

1    17
2     2
3    77
4    72
Name: (Y, banana), dtype: int32


In [16]:
data1 = {"fruits":["apple","orange","banana","strawberry","kiwi"],
        "year":[2001,2002,2001,2008,2007],
        "price":[150,120,100,2510,3000]}

df1 = pd.DataFrame(data1)

data2 = {"fruits":["apple","orange","banana","strawberry","mango"],
        "year":[2001,2002,2001,2008,2007],
        "price":[150,120,100,2510,3000]}
df2 = pd.DataFrame(data2)

print(df1)
print()
print(df2)
print()

df3 = pd.merge(df1, df2, on="fruits", how="inner")
print(df3)





       fruits  year  price
0       apple  2001    150
1      orange  2002    120
2      banana  2001    100
3  strawberry  2008   2510
4        kiwi  2007   3000

       fruits  year  price
0       apple  2001    150
1      orange  2002    120
2      banana  2001    100
3  strawberry  2008   2510
4       mango  2007   3000

       fruits  year_x  price_x  year_y  price_y
0       apple    2001      150    2001      150
1      orange    2002      120    2002      120
2      banana    2001      100    2001      100
3  strawberry    2008     2510    2008     2510


In [23]:
order_df = pd.DataFrame([[1000,2546,103],
                         [1001,4352,101],
                         [1002,342,101]],
                         columns=["id","item_id","customer_id"])

customer_df = pd.DataFrame([[101,"Tanaka"],
                            [102,"Suzuki"],
                            [103,"Kato"]],
                           columns=["id","name"])

order_df = pd.merge(order_df, customer_df, left_on="customer_id",right_on="id", how="inner")

print(order_df)

   id_x  item_id  customer_id  id_y    name
0  1000     2546          103   103    Kato
1  1001     4352          101   101  Tanaka
2  1002      342          101   101  Tanaka


In [24]:
customer_df.index = [101,102,103]
order_df = pd.merge(order_df, customer_df,left_on="customer_id",right_index=True, how="inner")
print(order_df)

   id_x  item_id  customer_id  id_y  name_x   id  name_y
0  1000     2546          103   103    Kato  103    Kato
1  1001     4352          101   101  Tanaka  101  Tanaka
2  1002      342          101   101  Tanaka  101  Tanaka


In [33]:
np.random.seed(0)
columns = ["apple","orange","banana","strawberry","kiwi"]

df = pd.DataFrame()
for column in columns:
    df[column] = np.random.choice(range(1,11),10)
df.index = range(1, 11)

df_head = df.head(3)
df_tail = df.tail(3)

print(df_head)
print(df_tail)


   apple  orange  banana  strawberry  kiwi
1      6       8       6           3    10
2      1       7      10           4    10
3      4       9       9           9     1
    apple  orange  banana  strawberry  kiwi
8       6       8       4           8     8
9       3       9       6           1     3
10      5       2       1           2     1


In [34]:
prefecture_df = pd.DataFrame([["Tokyo",2190,13636,"Kanto"],
                             ["Kanagawa",2415,9145,"Kanto"],
                             ["Osaka",1904,8837,"Kinki"],
                             ["Kyoto",4610,2605,"Kinki"],
                             ["Aichi",5172,7505,"Chubu"]],
                            columns=["Prefecture","Area","Population","Region"])
print(prefecture_df)
print()

grouped_region = prefecture_df.groupby("Region")

mean_df = grouped_region.mean()

print(mean_df)

  Prefecture  Area  Population Region
0      Tokyo  2190       13636  Kanto
1   Kanagawa  2415        9145  Kanto
2      Osaka  1904        8837  Kinki
3      Kyoto  4610        2605  Kinki
4      Aichi  5172        7505  Chubu

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000162B2579B20>
