In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import lightgbm as lgb
import bisect
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from pathlib import Path

In [None]:
# from pycaret.regression import *
import random
import torch
import os
# Seed 고정
SEED = 42
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    # tf.random.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(False) # True
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
seed_everything(SEED)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Data Load

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Machine_Learnig_Project/train_data_modified.csv')
test = pd.read_csv('/content/drive/MyDrive/Machine_Learnig_Project/test_data_modified.csv')

In [None]:
train[train.columns[train.isnull().any()]].shape

(539490, 0)

In [None]:
train.head()

Unnamed: 0,gid,DATE,TIME,RIDE_DEMAND,ALIGHT_DEMAND
0,다마9599,2023-06-01,5,1,0
1,다마9599,2023-06-01,6,2,5
2,다마9599,2023-06-01,7,2,7
3,다마9599,2023-06-01,8,3,12
4,다마9599,2023-06-01,9,2,31


In [None]:
test.head()

Unnamed: 0,gid,DATE,TIME,ALIGHT_DEMAND
0,다마9599,2023-06-24,5,0
1,다마9599,2023-06-24,6,1
2,다마9599,2023-06-24,7,0
3,다마9599,2023-06-24,8,1
4,다마9599,2023-06-24,9,5


In [None]:
test

Unnamed: 0,gid,DATE,TIME,ALIGHT_DEMAND
0,다마9599,2023-06-24,5,0
1,다마9599,2023-06-24,6,1
2,다마9599,2023-06-24,7,0
3,다마9599,2023-06-24,8,1
4,다마9599,2023-06-24,9,5
...,...,...,...,...
161842,라바0421,2023-08-31,21,1
161843,라바0421,2023-08-31,22,0
161844,라바0421,2023-08-31,23,0
161845,라바0421,2023-08-31,0,0


In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 539490 entries, 0 to 539489
Data columns (total 5 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   gid            539490 non-null  object
 1   DATE           539490 non-null  object
 2   TIME           539490 non-null  int64 
 3   RIDE_DEMAND    539490 non-null  int64 
 4   ALIGHT_DEMAND  539490 non-null  int64 
dtypes: int64(3), object(2)
memory usage: 20.6+ MB


In [None]:
train.describe()

Unnamed: 0,TIME,RIDE_DEMAND,ALIGHT_DEMAND
count,539490.0,539490.0,539490.0
mean,12.714286,39.199524,39.179217
std,6.547582,106.844929,112.895851
min,0.0,0.0,0.0
25%,8.0,0.0,0.0
50%,13.0,1.0,1.0
75%,18.0,28.0,26.0
max,23.0,2485.0,3121.0


## Data Preprocessing (파생변수 생성)

In [None]:
# datetime 컬럼 처리
train['DATE'] = pd.to_datetime(train['DATE'])
test['DATE'] = pd.to_datetime(test['DATE'])

# datetime을 여러 파생 변수로 변환
for df in [train, test]:
    # df['year'] = df['DATE'].dt.year
    df['month'] = df['DATE'].dt.month
    df['day'] = df['DATE'].dt.day
    df['weekday'] = df['DATE'].dt.weekday

# datetime 컬럼 제거
train.drop(columns='DATE', inplace=True)
test.drop(columns='DATE', inplace=True)

In [None]:
## 새벽, 아침, 낮, 저녁, 밤 (법과 절기학을 기준으로 시간대 별 나누기)
def Get_TimeOfDay(hour):
    if 0 <= hour < 5:
        return '1'
    elif 5 <= hour < 9:
        return '2'
    elif 9 <= hour < 17:
        return '3'
    elif 17 <= hour < 21:
        return '4'
    else:
        return '5'

train['TimeOfDay'] = train['TIME'].apply(lambda x: Get_TimeOfDay(x % 24))  # 시간을 24로 나누어 시간대 계산
test['TimeOfDay'] = test['TIME'].apply(lambda x: Get_TimeOfDay(x % 24))

In [None]:
train.groupby(['gid', 'month', 'day'])['ALIGHT_DEMAND'].sum().reset_index()

Unnamed: 0,gid,month,day,ALIGHT_DEMAND
0,다마9599,6,1,258
1,다마9599,6,2,175
2,다마9599,6,3,152
3,다마9599,6,4,310
4,다마9599,6,5,359
...,...,...,...,...
25685,라바0421,8,20,4
25686,라바0421,8,21,7
25687,라바0421,8,22,5
25688,라바0421,8,23,6


In [None]:
# gid 별 일별 하차 계산
grid_daily_alighting = train.groupby(['gid', 'month', 'day'])['ALIGHT_DEMAND'].sum().reset_index()

# gid 별 월별 하차 계산
grid_monthly_alighting = train.groupby(['gid', 'month'])['ALIGHT_DEMAND'].sum().reset_index()

In [None]:
# 하차 비율 (Alight Ratio)
train['alight_ratio'] = train['ALIGHT_DEMAND'] / (train['RIDE_DEMAND'] + 1)

In [None]:
## 일별 하차 비율
daily_alight_ratio = train.groupby(['gid', 'month', 'day'])['alight_ratio'].mean().reset_index()
daily_ratio_gid_mean = daily_alight_ratio.groupby('gid')['alight_ratio'].mean().reset_index()

In [None]:
daily_alight_ratio

Unnamed: 0,gid,month,day,alight_ratio
0,다마9599,6,1,1.665215
1,다마9599,6,2,1.338851
2,다마9599,6,3,1.620039
3,다마9599,6,4,7.307710
4,다마9599,6,5,2.238483
...,...,...,...,...
25685,라바0421,8,20,0.166667
25686,라바0421,8,21,0.142857
25687,라바0421,8,22,0.206349
25688,라바0421,8,23,0.253968


In [None]:
## 월별 하차 비율
monthly_alight_ratio = train.groupby(['gid', 'month'])['alight_ratio'].mean().reset_index()

In [None]:
## 이후 train data에 합쳐주기
train = pd.merge(train, grid_monthly_alighting.rename(columns={'ALIGHT_DEMAND': 'Month_ALIGHT'}), on=['gid', 'month'], how='left')
train = pd.merge(train, grid_daily_alighting.rename(columns={'ALIGHT_DEMAND': 'DAY_ALIGHT'}), on=['gid', 'month', 'day'], how='left')

In [None]:
## 이후 train data에 합쳐주기
train = pd.merge(train, monthly_alight_ratio.rename(columns={'alight_ratio': 'monthly_ratio'}), on=['gid', 'month'], how='left')
train = pd.merge(train, daily_ratio_gid_mean.rename(columns={'alight_ratio': 'daily_ratio_mean'}), on=['gid'], how='left')

In [None]:
# 하루 평균 하차 (Daily Average Alighting)
daily_alighting = train.groupby(['gid', 'month', 'day'])['ALIGHT_DEMAND'].sum().reset_index()
daily_alighting['daily_avg_al'] = daily_alighting['ALIGHT_DEMAND'] / daily_alighting.groupby(['gid'])['day'].transform('nunique')
train = pd.merge(train, daily_alighting[['gid', 'month', 'day', 'daily_avg_al']], on=['gid', 'month', 'day'], how='left')

In [None]:
# 하차 이동 평균 (Alighting Moving Average)
window = 3
train['al_mov_avg'] = train.groupby('gid')['ALIGHT_DEMAND'].transform(lambda x: x.rolling(window, min_periods=1).mean())

In [None]:
train.columns

Index(['gid', 'TIME', 'RIDE_DEMAND', 'ALIGHT_DEMAND', 'month', 'day',
       'weekday', 'TimeOfDay', 'alight_ratio', 'Month_ALIGHT', 'DAY_ALIGHT',
       'monthly_ratio', 'daily_ratio_mean', 'daily_avg_al', 'al_mov_avg'],
      dtype='object')

## Test data에도 똑같이 변수 생성

In [None]:
# gid 별 일별 하차 계산
grid_daily_alighting = test.groupby(['gid', 'month', 'day'])['ALIGHT_DEMAND'].sum().reset_index()

# gid 별 월별 하차 계산
grid_monthly_alighting = test.groupby(['gid', 'month'])['ALIGHT_DEMAND'].sum().reset_index()

In [None]:
## 이후 test data에 합쳐주기
test = pd.merge(test, grid_monthly_alighting.rename(columns={'ALIGHT_DEMAND': 'Month_ALIGHT'}), on=['gid', 'month'], how='left')
test = pd.merge(test, grid_daily_alighting.rename(columns={'ALIGHT_DEMAND': 'DAY_ALIGHT'}), on=['gid', 'month', 'day'], how='left')

In [None]:
# test 데이터셋에 추가
test = pd.merge(test, monthly_alight_ratio.rename(columns={'alight_ratio': 'monthly_ratio'}), on=['gid', 'month'], how='left')
test = pd.merge(test, daily_ratio_gid_mean.rename(columns={'alight_ratio': 'daily_ratio_mean'}), on='gid', how='left')

In [None]:
# 하루 평균 하차 (Daily Average Alighting)
daily_alighting = test.groupby(['gid', 'month', 'day'])['ALIGHT_DEMAND'].sum().reset_index()
daily_alighting['daily_avg_al'] = daily_alighting['ALIGHT_DEMAND'] / daily_alighting.groupby(['gid'])['day'].transform('nunique')
test = pd.merge(test, daily_alighting[['gid', 'month', 'day', 'daily_avg_al']], on=['gid', 'month', 'day'], how='left')

In [None]:
# 하차 이동 평균 (Alighting Moving Average)
window = 3
test['al_mov_avg'] = test.groupby('gid')['ALIGHT_DEMAND'].transform(lambda x: x.rolling(window, min_periods=1).mean())

In [None]:
test.columns

Index(['gid', 'TIME', 'ALIGHT_DEMAND', 'month', 'day', 'weekday', 'TimeOfDay',
       'Month_ALIGHT', 'DAY_ALIGHT', 'monthly_ratio', 'daily_ratio_mean',
       'daily_avg_al', 'al_mov_avg'],
      dtype='object')

### `범주형` 변수들 인코딩

In [None]:
# 'TimeOfDay' 열을 범주형으로 변환
train['TimeOfDay'] = train['TimeOfDay'].astype('category')
test['TimeOfDay'] = test['TimeOfDay'].astype('category')

### GID를 기준으로 인구 데이터 추가

In [None]:
import geopandas as gpd

# Shapefile 불러오기
shapefile_path = '/content/drive/MyDrive/Machine_Learnig_Project/nlsp_020001001.shp'
gdf = gpd.read_file(shapefile_path, encoding = 'UTF-8')

# 데이터 확인
print(gdf.head())
print(gdf.info())

      gid       lbl      val  \
0  다바8516  16173.00  16173.0   
1  라바0120     31.00     31.0   
2  다바9614  14729.00  14729.0   
3  다바8728       N/A      0.0   
4  다바9716     79.00     79.0   

                                            geometry  
0  POLYGON ((985000.000 1816000.000, 985000.000 1...  
1  POLYGON ((1001000.000 1820000.000, 1001000.000...  
2  POLYGON ((996000.000 1814000.000, 996000.000 1...  
3  POLYGON ((987000.000 1828000.000, 987000.000 1...  
4  POLYGON ((997000.000 1816000.000, 997000.000 1...  
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 632 entries, 0 to 631
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   gid       632 non-null    object  
 1   lbl       474 non-null    object  
 2   val       474 non-null    float64 
 3   geometry  632 non-null    geometry
dtypes: float64(1), geometry(1), object(2)
memory usage: 19.9+ KB
None


In [None]:
## 열 이름 변경
gdf.rename(columns={'GID':'gid'}, inplace=True)

In [None]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 632 entries, 0 to 631
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   gid       632 non-null    object  
 1   lbl       474 non-null    object  
 2   val       474 non-null    float64 
 3   geometry  632 non-null    geometry
dtypes: float64(1), geometry(1), object(2)
memory usage: 19.9+ KB


In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 539490 entries, 0 to 539489
Data columns (total 15 columns):
 #   Column            Non-Null Count   Dtype   
---  ------            --------------   -----   
 0   gid               539490 non-null  object  
 1   TIME              539490 non-null  int64   
 2   RIDE_DEMAND       539490 non-null  int64   
 3   ALIGHT_DEMAND     539490 non-null  int64   
 4   month             539490 non-null  int64   
 5   day               539490 non-null  int64   
 6   weekday           539490 non-null  int64   
 7   TimeOfDay         539490 non-null  category
 8   alight_ratio      539490 non-null  float64 
 9   Month_ALIGHT      539490 non-null  int64   
 10  DAY_ALIGHT        539490 non-null  int64   
 11  monthly_ratio     539490 non-null  float64 
 12  daily_ratio_mean  539490 non-null  float64 
 13  daily_avg_al      539490 non-null  float64 
 14  al_mov_avg        539490 non-null  float64 
dtypes: category(1), float64(5), int64(8), object(1)
mem

In [None]:
# 'GID' 열을 기준으로 두 데이터프레임을 병합
merged_data = train.merge(gdf, on='gid', how='left')

# 결과 확인
print(merged_data.head())

      gid  TIME  RIDE_DEMAND  ALIGHT_DEMAND  month  day  weekday TimeOfDay  \
0  다마9599     5            1              0      6    1        3         2   
1  다마9599     6            2              5      6    1        3         2   
2  다마9599     7            2              7      6    1        3         2   
3  다마9599     8            3             12      6    1        3         2   
4  다마9599     9            2             31      6    1        3         3   

   alight_ratio  Month_ALIGHT  DAY_ALIGHT  monthly_ratio  daily_ratio_mean  \
0      0.000000          5201         258       2.193231          1.613118   
1      1.666667          5201         258       2.193231          1.613118   
2      2.333333          5201         258       2.193231          1.613118   
3      3.000000          5201         258       2.193231          1.613118   
4     10.333333          5201         258       2.193231          1.613118   

   daily_avg_al  al_mov_avg   lbl  val  \
0         10.75    0

In [None]:
# 'val' 열의 None 값과 NaN 값을 0으로 대체
merged_data['val'].fillna(0, inplace=True)

In [None]:
merged_data.head()

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,alight_ratio,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,lbl,val,geometry
0,다마9599,5,1,0,6,1,3,2,0.0,5201,258,2.193231,1.613118,10.75,0.0,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
1,다마9599,6,2,5,6,1,3,2,1.666667,5201,258,2.193231,1.613118,10.75,2.5,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
2,다마9599,7,2,7,6,1,3,2,2.333333,5201,258,2.193231,1.613118,10.75,4.0,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
3,다마9599,8,3,12,6,1,3,2,3.0,5201,258,2.193231,1.613118,10.75,8.0,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
4,다마9599,9,2,31,6,1,3,3,10.333333,5201,258,2.193231,1.613118,10.75,16.666667,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."


###GID를 기준으로 인구 데이터 추가 Test set

In [None]:
# 'GID' 열을 기준으로 두 데이터프레임을 병합
merged_test = test.merge(gdf, on='gid', how='left')

# 결과 확인
print(merged_test.head())

      gid  TIME  ALIGHT_DEMAND  month  day  weekday TimeOfDay  Month_ALIGHT  \
0  다마9599     5              0      6   24        5         2           468   
1  다마9599     6              1      6   24        5         2           468   
2  다마9599     7              0      6   24        5         2           468   
3  다마9599     8              1      6   24        5         2           468   
4  다마9599     9              5      6   24        5         3           468   

   DAY_ALIGHT  monthly_ratio  daily_ratio_mean  daily_avg_al  al_mov_avg  \
0          85       2.193231          1.613118        10.625    0.000000   
1          85       2.193231          1.613118        10.625    0.500000   
2          85       2.193231          1.613118        10.625    0.333333   
3          85       2.193231          1.613118        10.625    0.666667   
4          85       2.193231          1.613118        10.625    2.000000   

    lbl  val                                           geometry  
0 

In [None]:
# 'val' 열의 None 값과 NaN 값을 0으로 대체
merged_test['val'].fillna(0, inplace=True)

### 기존 Weekday 변수에서 새로운 값 생성

In [None]:
# 6월 6일 현충일, 8월 15일 광복절인 날을 weekday를 공휴일 값으로 따로 지정
merged_data.loc[((merged_data['month'] == 6) & (merged_data['day'] == 6)) | ((merged_data['month'] == 8) & (merged_data['day'] == 15)), 'weekday'] = 7

In [None]:
merged_data.loc[((merged_data['month'] == 6) & (merged_data['day'] == 6)) | ((merged_data['month'] == 8) & (merged_data['day'] == 15))]

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,alight_ratio,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,lbl,val,geometry
105,다마9599,5,4,0,6,6,7,2,0.000000,5201,250,2.193231,1.613118,10.416667,0.000000,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
106,다마9599,6,5,7,6,6,7,2,1.166667,5201,250,2.193231,1.613118,10.416667,2.333333,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
107,다마9599,7,5,3,6,6,7,2,0.500000,5201,250,2.193231,1.613118,10.416667,3.333333,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
108,다마9599,8,9,2,6,6,7,2,0.200000,5201,250,2.193231,1.613118,10.416667,4.000000,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
109,다마9599,9,7,21,6,6,7,3,2.625000,5201,250,2.193231,1.613118,10.416667,8.666667,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539296,라바0421,21,0,0,8,15,7,5,0.000000,95,2,0.142030,0.141839,0.083333,0.000000,,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
539297,라바0421,22,0,0,8,15,7,5,0.000000,95,2,0.142030,0.141839,0.083333,0.000000,,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
539298,라바0421,23,0,0,8,15,7,5,0.000000,95,2,0.142030,0.141839,0.083333,0.000000,,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
539299,라바0421,0,0,0,8,15,7,1,0.000000,95,2,0.142030,0.141839,0.083333,0.000000,,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."


In [None]:
merged_data.head()

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,alight_ratio,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,lbl,val,geometry
0,다마9599,5,1,0,6,1,3,2,0.0,5201,258,2.193231,1.613118,10.75,0.0,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
1,다마9599,6,2,5,6,1,3,2,1.666667,5201,258,2.193231,1.613118,10.75,2.5,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
2,다마9599,7,2,7,6,1,3,2,2.333333,5201,258,2.193231,1.613118,10.75,4.0,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
3,다마9599,8,3,12,6,1,3,2,3.0,5201,258,2.193231,1.613118,10.75,8.0,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
4,다마9599,9,2,31,6,1,3,3,10.333333,5201,258,2.193231,1.613118,10.75,16.666667,,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."


## 중복 열 제거

In [None]:
merged_data.drop('lbl', axis=1, inplace=True)

In [None]:
merged_test.drop('lbl', axis=1, inplace=True)

In [None]:
merged_data

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,alight_ratio,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry
0,다마9599,5,1,0,6,1,3,2,0.000000,5201,258,2.193231,1.613118,10.750,0.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
1,다마9599,6,2,5,6,1,3,2,1.666667,5201,258,2.193231,1.613118,10.750,2.500000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
2,다마9599,7,2,7,6,1,3,2,2.333333,5201,258,2.193231,1.613118,10.750,4.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
3,다마9599,8,3,12,6,1,3,2,3.000000,5201,258,2.193231,1.613118,10.750,8.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
4,다마9599,9,2,31,6,1,3,3,10.333333,5201,258,2.193231,1.613118,10.750,16.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539485,라바0421,21,0,1,8,24,3,5,1.000000,95,3,0.142030,0.141839,0.125,1.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
539486,라바0421,22,0,0,8,24,3,5,0.000000,95,3,0.142030,0.141839,0.125,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
539487,라바0421,23,0,0,8,24,3,5,0.000000,95,3,0.142030,0.141839,0.125,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
539488,라바0421,0,0,0,8,24,3,1,0.000000,95,3,0.142030,0.141839,0.125,0.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."


In [None]:
merged_test

Unnamed: 0,gid,TIME,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry
0,다마9599,5,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
1,다마9599,6,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.500000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
2,다마9599,7,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.333333,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
3,다마9599,8,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
4,다마9599,9,5,6,24,5,3,468,85,2.193231,1.613118,10.625,2.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161842,라바0421,21,1,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
161843,라바0421,22,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
161844,라바0421,23,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."
161845,라바0421,0,0,8,31,3,1,21,2,0.142030,0.141839,0.250,0.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000..."


## gid 별 대전 의원 갯수 추가

In [None]:
sh = pd.read_csv('/content/drive/MyDrive/Machine_Learnig_Project/sh_data.csv')
sh.head()

Unnamed: 0,gid,lbl,val,geometry,index_right,의료기관명,의료기관주소,시군구,읍면동,개수_x
0,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,62.0,하얀이치과의원,"대전광역시 유성구 도안대로 511-13, 3층 (상대동)",유성구,상대동,13.0
1,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,59.0,도안한의원,"대전광역시 유성구 도안대로 511-13, 202호 (상대동)",유성구,상대동,13.0
2,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,64.0,에이케이플란트치과의원,"대전광역시 유성구 도안대로 511-7, 4층 (상대동)",유성구,상대동,13.0
3,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,56.0,고려정형외과의원,"대전광역시 유성구 도안대로 511-7, 춘추빌딩동 3층 (상대동)",유성구,상대동,13.0
4,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,63.0,지성연합치과의원,"대전광역시 유성구 도안대로 511-6 (상대동,3층)",유성구,상대동,13.0


In [None]:
# 'gid' 열을 그룹화하고 'bus_st_count' 열에 갯수를 넣어줌
grouped = sh.groupby('gid').size().reset_index(name='sh_count')

In [None]:
# 'gid'와 'bus_st_count' 열만 있는 새로운 데이터프레임을 만듦
new_sh = grouped[['gid', 'sh_count']]

In [None]:
new_sh.head

<bound method NDFrame.head of         gid  sh_count
0    다바8102         1
1    다바8116         1
2    다바8215         1
3    다바8216         6
4    다바8220         2
..      ...       ...
137  다바9613        32
138  다바9614        15
139  다바9709         6
140  다바9713         2
141  다바9809         2

[142 rows x 2 columns]>

In [None]:
# 기존의 데이터프레임 train와 새로 만든 데이터프레임 new_df를 gid를 기준으로 병합
merged_data = pd.merge(merged_data, new_sh, on='gid', how='left')

In [None]:
# 기존의 데이터프레임 test와 새로 만든 데이터프레임 new_df를 gid를 기준으로 병합
merged_test = pd.merge(merged_test, new_sh, on='gid', how='left')

In [None]:
# NaN 값을 0으로 대체
merged_data['sh_count'].fillna(0, inplace=True)
merged_data.head()

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,alight_ratio,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count
0,다마9599,5,1,0,6,1,3,2,0.0,5201,258,2.193231,1.613118,10.75,0.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
1,다마9599,6,2,5,6,1,3,2,1.666667,5201,258,2.193231,1.613118,10.75,2.5,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
2,다마9599,7,2,7,6,1,3,2,2.333333,5201,258,2.193231,1.613118,10.75,4.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
3,다마9599,8,3,12,6,1,3,2,3.0,5201,258,2.193231,1.613118,10.75,8.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
4,다마9599,9,2,31,6,1,3,3,10.333333,5201,258,2.193231,1.613118,10.75,16.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0


In [None]:
merged_test

Unnamed: 0,gid,TIME,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count
0,다마9599,5,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",
1,다마9599,6,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.500000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",
2,다마9599,7,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.333333,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",
3,다마9599,8,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",
4,다마9599,9,5,6,24,5,3,468,85,2.193231,1.613118,10.625,2.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161842,라바0421,21,1,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",
161843,라바0421,22,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",
161844,라바0421,23,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",
161845,라바0421,0,0,8,31,3,1,21,2,0.142030,0.141839,0.250,0.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",


In [None]:
# NaN 값을 0으로 대체
merged_test['sh_count'].fillna(0, inplace=True)
merged_test

Unnamed: 0,gid,TIME,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count
0,다마9599,5,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
1,다마9599,6,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.500000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
2,다마9599,7,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.333333,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
3,다마9599,8,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
4,다마9599,9,5,6,24,5,3,468,85,2.193231,1.613118,10.625,2.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161842,라바0421,21,1,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0
161843,라바0421,22,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0
161844,라바0421,23,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0
161845,라바0421,0,0,8,31,3,1,21,2,0.142030,0.141839,0.250,0.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0


## gid 별 대전 병원 갯수 추가

In [None]:
bh = pd.read_csv('/content/drive/MyDrive/Machine_Learnig_Project/bh_data.csv')
bh.head()

Unnamed: 0,gid,lbl,val,geometry,index_right,의료기관명,의료기관주소,시군구,읍면동,개수
0,다바9503,249.0,249.0,POLYGON ((127.44436522938219 36.22417844193214...,64.0,보광노인전문병원,대전광역시 동구 산내로560번길 18-11 (상소동),동구,상소동,1.0
1,다바9428,3804.0,3804.0,POLYGON ((127.43304566943395 36.44955849281782...,40.0,이엘치과병원,"대전광역시 대덕구 신탄진로 782 (신탄진동, 1,2,3,4,5층)",대덕구,신탄진동,2.0
2,다바9116,6374.0,6374.0,POLYGON ((127.39970762397725 36.34135102099844...,36.0,평화요양병원,대전광역시 중구 대전천서로 745 (중촌동),중구,중촌동,2.0
3,다바9612,7385.0,7385.0,POLYGON ((127.45544611742866 36.30532297678221...,51.0,새봄요양병원,대전광역시 동구 신기로 123-20 (가오동),동구,가오동,1.0
4,다바9613,15255.0,15255.0,POLYGON ((127.45544098797333 36.31433845076714...,54.0,대전그린의료소비자생활협동조합 그린요양병원,대전광역시 동구 옥천로176번길 15-4 (판암동),동구,판암동,1.0


In [None]:
# 'gid' 열을 그룹화하고 'bus_st_count' 열에 갯수를 넣어줌
grouped = bh.groupby('gid').size().reset_index(name='bh_count')

In [None]:
# 'gid'와 'bus_st_count' 열만 있는 새로운 데이터프레임을 만듦
new_bh = grouped[['gid', 'bh_count']]

In [None]:
new_bh.head()

Unnamed: 0,gid,bh_count
0,다바8411,1
1,다바8515,1
2,다바8517,3
3,다바8612,1
4,다바8617,7


In [None]:
# 기존의 데이터프레임 df와 새로 만든 데이터프레임 new_df를 gid를 기준으로 병합
merged_data = pd.merge(merged_data, new_bh, on='gid', how='left')

In [None]:
# 기존의 데이터프레임 df와 새로 만든 데이터프레임 new_df를 gid를 기준으로 병합
merged_test = pd.merge(merged_test, new_bh, on='gid', how='left')

In [None]:
# NaN 값을 0으로 대체
merged_data['bh_count'].fillna(0, inplace=True)
merged_data.head()

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,alight_ratio,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count,bh_count
0,다마9599,5,1,0,6,1,3,2,0.0,5201,258,2.193231,1.613118,10.75,0.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
1,다마9599,6,2,5,6,1,3,2,1.666667,5201,258,2.193231,1.613118,10.75,2.5,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
2,다마9599,7,2,7,6,1,3,2,2.333333,5201,258,2.193231,1.613118,10.75,4.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
3,다마9599,8,3,12,6,1,3,2,3.0,5201,258,2.193231,1.613118,10.75,8.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
4,다마9599,9,2,31,6,1,3,3,10.333333,5201,258,2.193231,1.613118,10.75,16.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0


In [None]:
# NaN 값을 0으로 대체
merged_test['bh_count'].fillna(0, inplace=True)
merged_test.head()

Unnamed: 0,gid,TIME,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count,bh_count
0,다마9599,5,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
1,다마9599,6,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.5,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
2,다마9599,7,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.333333,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
3,다마9599,8,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0
4,다마9599,9,5,6,24,5,3,468,85,2.193231,1.613118,10.625,2.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0


## gid 별 대전 버스 정류장 갯수 추가

In [None]:
bus = pd.read_csv('/content/drive/MyDrive/Machine_Learnig_Project/bus_data.csv')
bus.head()

Unnamed: 0,gid,lbl,val,geometry,index_right,지형지물부호,관리번호,행정읍면동,도엽번호,도로구간번호,공사번호,정류장종류,정류장명,정류장유형,대장초기화여부,위도,경도
0,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,1124.0,정류장,2209001000.0,대전광역시 유성구,36710064.0,31544.0,STT1299000,마을버스,도안 한라비발디3단지,유개형(도시형),1.0,36.342867,127.333976
1,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,1123.0,정류장,2209001000.0,대전광역시 유성구,36710064.0,31544.0,STT1298000,마을버스,한라비발디아파트,유개형(도시형),1.0,36.343086,127.333513
2,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,1122.0,정류장,2209001000.0,대전광역시 유성구,36710064.0,31838.0,STT1297000,마을버스,유성구마을버스,유개형(도시형),1.0,36.345661,127.335504
3,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,937.0,정류장,2209001000.0,대전광역시 유성구,36710064.0,31563.0,STT1081000,시내버스,트리플시티9단지,유개형(도시형),1.0,36.341452,127.338029
4,다바8516,16173.0,16173.0,POLYGON ((127.33284624543423 36.34127620777740...,1151.0,정류장,2209001000.0,대전광역시 유성구,36710064.0,31563.0,STT1331000,버스공용,트리풀시티9단지,유개형(도시형),1.0,36.341497,127.338713


In [None]:
# 'gid' 열을 그룹화하고 'bus_st_count' 열에 갯수를 넣어줌
grouped = bus.groupby('gid').size().reset_index(name='bus_st_count')

In [None]:
# 'gid'와 'bus_st_count' 열만 있는 새로운 데이터프레임을 만듦
new_bus = grouped[['gid', 'bus_st_count']]

In [None]:
new_bus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201 entries, 0 to 200
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   gid           201 non-null    object
 1   bus_st_count  201 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 3.3+ KB


In [None]:
# 기존의 데이터프레임 df와 새로 만든 데이터프레임 new_df를 gid를 기준으로 병합
merged_data = pd.merge(merged_data, new_bus, on='gid', how='left')

In [None]:
# 기존의 데이터프레임 df와 새로 만든 데이터프레임 new_df를 gid를 기준으로 병합
merged_test = pd.merge(merged_test, new_bus, on='gid', how='left')

In [None]:
# NaN 값을 0으로 대체
merged_data['bus_st_count'].fillna(0, inplace=True)
merged_data.head()

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,alight_ratio,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count,bh_count,bus_st_count
0,다마9599,5,1,0,6,1,3,2,0.0,5201,258,2.193231,1.613118,10.75,0.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
1,다마9599,6,2,5,6,1,3,2,1.666667,5201,258,2.193231,1.613118,10.75,2.5,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
2,다마9599,7,2,7,6,1,3,2,2.333333,5201,258,2.193231,1.613118,10.75,4.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
3,다마9599,8,3,12,6,1,3,2,3.0,5201,258,2.193231,1.613118,10.75,8.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
4,다마9599,9,2,31,6,1,3,3,10.333333,5201,258,2.193231,1.613118,10.75,16.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0


In [None]:
# NaN 값을 0으로 대체
merged_test['bus_st_count'].fillna(0, inplace=True)
merged_test.head()

Unnamed: 0,gid,TIME,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count,bh_count,bus_st_count
0,다마9599,5,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
1,다마9599,6,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.5,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
2,다마9599,7,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.333333,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
3,다마9599,8,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
4,다마9599,9,5,6,24,5,3,468,85,2.193231,1.613118,10.625,2.0,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0


In [None]:
# 숫자형으로 변환할 열의 리스트
numeric_columns = ['RIDE_DEMAND', 'ALIGHT_DEMAND', 'bus_st_count', 'val']

# 모든 열을 숫자형으로 변환
merged_data[numeric_columns] = merged_data[numeric_columns].apply(pd.to_numeric, errors='coerce')

In [None]:
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 539490 entries, 0 to 539489
Data columns (total 20 columns):
 #   Column            Non-Null Count   Dtype   
---  ------            --------------   -----   
 0   gid               539490 non-null  object  
 1   TIME              539490 non-null  int64   
 2   RIDE_DEMAND       539490 non-null  int64   
 3   ALIGHT_DEMAND     539490 non-null  int64   
 4   month             539490 non-null  int64   
 5   day               539490 non-null  int64   
 6   weekday           539490 non-null  int64   
 7   TimeOfDay         539490 non-null  category
 8   alight_ratio      539490 non-null  float64 
 9   Month_ALIGHT      539490 non-null  int64   
 10  DAY_ALIGHT        539490 non-null  int64   
 11  monthly_ratio     539490 non-null  float64 
 12  daily_ratio_mean  539490 non-null  float64 
 13  daily_avg_al      539490 non-null  float64 
 14  al_mov_avg        539490 non-null  float64 
 15  val               539490 non-null  float64 
 16  ge

In [None]:
merged_data.drop('alight_ratio', axis=1, inplace=True)

In [None]:
merged_data

Unnamed: 0,gid,TIME,RIDE_DEMAND,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count,bh_count,bus_st_count
0,다마9599,5,1,0,6,1,3,2,5201,258,2.193231,1.613118,10.750,0.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
1,다마9599,6,2,5,6,1,3,2,5201,258,2.193231,1.613118,10.750,2.500000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
2,다마9599,7,2,7,6,1,3,2,5201,258,2.193231,1.613118,10.750,4.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
3,다마9599,8,3,12,6,1,3,2,5201,258,2.193231,1.613118,10.750,8.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
4,다마9599,9,2,31,6,1,3,3,5201,258,2.193231,1.613118,10.750,16.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539485,라바0421,21,0,1,8,24,3,5,95,3,0.142030,0.141839,0.125,1.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0
539486,라바0421,22,0,0,8,24,3,5,95,3,0.142030,0.141839,0.125,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0
539487,라바0421,23,0,0,8,24,3,5,95,3,0.142030,0.141839,0.125,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0
539488,라바0421,0,0,0,8,24,3,1,95,3,0.142030,0.141839,0.125,0.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0


In [None]:
merged_test.columns

Index(['gid', 'TIME', 'ALIGHT_DEMAND', 'month', 'day', 'weekday', 'TimeOfDay',
       'Month_ALIGHT', 'DAY_ALIGHT', 'monthly_ratio', 'daily_ratio_mean',
       'daily_avg_al', 'al_mov_avg', 'val', 'geometry', 'sh_count', 'bh_count',
       'bus_st_count'],
      dtype='object')

In [None]:
# CSV 파일을 Google 드라이브에 저장
file_path = '/content/drive/My Drive/Machine_Learnig_Project/merged_data.csv'
merged_data.to_csv(file_path, index=False)

## Test 데이터

In [None]:
merged_test

Unnamed: 0,gid,TIME,ALIGHT_DEMAND,month,day,weekday,TimeOfDay,Month_ALIGHT,DAY_ALIGHT,monthly_ratio,daily_ratio_mean,daily_avg_al,al_mov_avg,val,geometry,sh_count,bh_count,bus_st_count
0,다마9599,5,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
1,다마9599,6,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.500000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
2,다마9599,7,0,6,24,5,2,468,85,2.193231,1.613118,10.625,0.333333,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
3,다마9599,8,1,6,24,5,2,468,85,2.193231,1.613118,10.625,0.666667,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
4,다마9599,9,5,6,24,5,3,468,85,2.193231,1.613118,10.625,2.000000,0.0,"POLYGON ((995000.000 1799000.000, 995000.000 1...",0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161842,라바0421,21,1,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0
161843,라바0421,22,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0
161844,라바0421,23,0,8,31,3,5,21,2,0.142030,0.141839,0.250,0.333333,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0
161845,라바0421,0,0,8,31,3,1,21,2,0.142030,0.141839,0.250,0.000000,0.0,"POLYGON ((1004000.000 1821000.000, 1004000.000...",0.0,0.0,0.0


In [None]:
merged_test.columns

Index(['gid', 'TIME', 'ALIGHT_DEMAND', 'month', 'day', 'weekday', 'TimeOfDay',
       'Month_ALIGHT', 'DAY_ALIGHT', 'monthly_ratio', 'daily_ratio_mean',
       'daily_avg_al', 'al_mov_avg', 'val', 'geometry', 'sh_count', 'bh_count',
       'bus_st_count'],
      dtype='object')

In [None]:
# CSV 파일을 Google 드라이브에 저장
file_path = '/content/drive/My Drive/Machine_Learnig_Project/merged_test.csv'
merged_test.to_csv(file_path, index=False)