In [2]:
## DataFrame 연결
import numpy as np
import pandas as pd

def make_random_df(index, columns, seed):
    np.random.seed(seed)
    df = pd.DataFrame()
    for column in columns:
        df[column] = np.random.choice(range(1, 101), len(index))
    df.index = index
    return df

columns = ['apple', 'orange', 'banana']
df_data1 = make_random_df(range(1, 5), columns, 0)
df_data2 = make_random_df(range(1, 5), columns, 1)

df1 = pd.concat([df_data1, df_data2], axis=0)
print(df1)

df2 = pd.concat([df_data1, df_data2], axis=1)
print(df2)

   apple  orange  banana
1     45      68      37
2     48      10      88
3     65      84      71
4     68      22      89
1     38      76      17
2     13       6       2
3     73      80      77
4     10      65      72
   apple  orange  banana  apple  orange  banana
1     45      68      37     38      76      17
2     48      10      88     13       6       2
3     65      84      71     73      80      77
4     68      22      89     10      65      72


In [5]:
## 인덱스나 컬럼이 일치하지 않을 경우

## DataFrame 연결
import numpy as np
import pandas as pd

def make_random_df(index, columns, seed):
    np.random.seed(seed)
    df = pd.DataFrame()
    for column in columns:
        df[column] = np.random.choice(range(1, 101), len(index))
    df.index = index
    return df

columns1 = ['apple', 'orange', 'banana']
columns2 = ['orange', 'kiwifruit', 'banana']

df_data1 = make_random_df(range(1, 5), columns1, 0)
df_data2 = make_random_df(range(1, 8, 2), columns2, 1)

df1 = pd.concat([df_data1, df_data2], axis=0)
print(df1)

df2 = pd.concat([df_data1, df_data2], axis=1)
print(df2)

   apple  orange  banana  kiwifruit
1   45.0      68      37        NaN
2   48.0      10      88        NaN
3   65.0      84      71        NaN
4   68.0      22      89        NaN
1    NaN      38      17       76.0
3    NaN      13       2        6.0
5    NaN      73      77       80.0
7    NaN      10      72       65.0
   apple  orange  banana  orange  kiwifruit  banana
1   45.0    68.0    37.0    38.0       76.0    17.0
2   48.0    10.0    88.0     NaN        NaN     NaN
3   65.0    84.0    71.0    13.0        6.0     2.0
4   68.0    22.0    89.0     NaN        NaN     NaN
5    NaN     NaN     NaN    73.0       80.0    77.0
7    NaN     NaN     NaN    10.0       65.0    72.0


In [6]:
## 라벨 지정

import numpy as np
import pandas as pd

def make_random_df(index, columns, seed):
    np.random.seed(seed)
    df = pd.DataFrame()
    for column in columns:
        df[column] = np.random.choice(range(1, 101), len(index))
    df.index = index
    return df

columns1 = ['apple', 'orange', 'banana']
columns2 = ['orange', 'kiwifruit', 'banana']

df_data1 = make_random_df(range(1, 5), columns1, 0)
df_data2 = make_random_df(range(1, 8, 2), columns2, 1)

df1 = pd.concat([df_data1, df_data2], axis=1, keys=['X', 'Y'])
print(df1)

      X                    Y                 
  apple orange banana orange kiwifruit banana
1  45.0   68.0   37.0   38.0      76.0   17.0
2  48.0   10.0   88.0    NaN       NaN    NaN
3  65.0   84.0   71.0   13.0       6.0    2.0
4  68.0   22.0   89.0    NaN       NaN    NaN
5   NaN    NaN    NaN   73.0      80.0   77.0
7   NaN    NaN    NaN   10.0      65.0   72.0


In [10]:
Y_banana = df1['Y', 'banana']
Y_banana

1    17.0
2     NaN
3     2.0
4     NaN
5    77.0
7    72.0
Name: (Y, banana), dtype: float64

In [11]:
data1 = {'fruits': ['apple', 'orange', 'banana', 'strawberry', 'kiwifruit'],
         'year': [2001, 2002, 2001, 2008, 2006],
         'amount': [1, 4, 5, 6, 3]}
df1 = pd.DataFrame(data1)

data2 = {'fruits': ['apple', 'orange', 'banana', 'strawberry', 'mango'],
         'year': [2001, 2002, 2001, 2008, 2007],
         'price': [150, 120, 100, 250, 3000]}
df2 = pd.DataFrame(data2)

df3 = pd.merge(df1, df2, on='fruits', how='inner')
df3

Unnamed: 0,fruits,year_x,amount,year_y,price
0,apple,2001,1,2001,150
1,orange,2002,4,2002,120
2,banana,2001,5,2001,100
3,strawberry,2008,6,2008,250


In [12]:
data1 = {'fruits': ['apple', 'orange', 'banana', 'strawberry', 'kiwifruit'],
         'year': [2001, 2002, 2001, 2008, 2006],
         'amount': [1, 4, 5, 6, 3]}
df1 = pd.DataFrame(data1)

data2 = {'fruits': ['apple', 'orange', 'banana', 'strawberry', 'mango'],
         'year': [2001, 2002, 2001, 2008, 2007],
         'price': [150, 120, 100, 250, 3000]}
df2 = pd.DataFrame(data2)

df3 = pd.merge(df1, df2, on='fruits', how='outer')
df3

Unnamed: 0,fruits,year_x,amount,year_y,price
0,apple,2001.0,1.0,2001.0,150.0
1,orange,2002.0,4.0,2002.0,120.0
2,banana,2001.0,5.0,2001.0,100.0
3,strawberry,2008.0,6.0,2008.0,250.0
4,kiwifruit,2006.0,3.0,,
5,mango,,,2007.0,3000.0


In [17]:
columns = ['apple', 'orange', 'banana', 'strawberry', 'kiwifruit']
df = make_random_df(range(1, 5), columns, 0)

df_des = df.describe().loc[['mean', 'max', 'min']]
df_des

Unnamed: 0,apple,orange,banana,strawberry,kiwifruit
mean,56.5,46.0,71.25,56.75,66.0
max,68.0,84.0,89.0,89.0,89.0
min,45.0,10.0,37.0,13.0,40.0


In [19]:
df

Unnamed: 0,apple,orange,banana,strawberry,kiwifruit
1,45,68,37,89,40
2,48,10,88,13,88
3,65,84,71,59,47
4,68,22,89,66,89


In [22]:
df_diff = df.diff(-1, axis=0)
df_diff

Unnamed: 0,apple,orange,banana,strawberry,kiwifruit
1,-3.0,58.0,-51.0,76.0,-48.0
2,-17.0,-74.0,17.0,-46.0,41.0
3,-3.0,62.0,-18.0,-7.0,-42.0
4,,,,,


In [24]:
prefecture_df = pd.DataFrame([['강릉', 1040, 213527, '강원도'],
                              ['광주', 430, 1458915, '전라도'],
                              ['평창', 1463, 42218, '강원도'],
                              ['대전', 539, 1476955, '충청도'],
                              ['단양', 780, 29816, '충청도']],
                              columns=['Prefecture', 'Area', 'Population', 'Region'])

In [25]:
prefecture_df

Unnamed: 0,Prefecture,Area,Population,Region
0,강릉,1040,213527,강원도
1,광주,430,1458915,전라도
2,평창,1463,42218,강원도
3,대전,539,1476955,충청도
4,단양,780,29816,충청도


In [27]:
grouped_region = prefecture_df.groupby('Region')
mean_df = grouped_region.mean()
mean_df

Unnamed: 0_level_0,Area,Population
Region,Unnamed: 1_level_1,Unnamed: 2_level_1
강원도,1251.5,127872.5
전라도,430.0,1458915.0
충청도,659.5,753385.5
