## Import Library 

In [37]:
# -*- coding: utf-8 -*-
# General Library
import warnings
warnings.filterwarnings('ignore')

import os, sys
import random
import numpy as np
import pandas as pd
from time import time
from glob import glob

import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning Library
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


feature_list = [
    '시간',
    '내부온도관측치', '내부습도관측치',
    'CO2관측치', 'EC관측치',
    '외부온도관측치', '외부습도관측치',
    '펌프상태', '펌프작동남은시간', '최근분무량', '일간누적분무량',
    '냉방상태', '냉방작동남은시간', '난방상태', '난방작동남은시간',
    '내부유동팬상태', '내부유동팬작동남은시간', '외부환기팬상태', '외부환기팬작동남은시간',
    '화이트 LED상태', '화이트 LED작동남은시간', '화이트 LED동작강도',
    '레드 LED상태', '레드 LED작동남은시간', '레드 LED동작강도',
    '블루 LED상태', '블루 LED작동남은시간', '블루 LED동작강도',
    '카메라상태',
    '냉방온도', '난방온도', '기준온도', '난방부하', '냉방부하',
    '총추정광량', '백색광추정광량', '적색광추정광량', '청색광추정광량'
]


In [38]:
# Setting Korean font
from sys import platform
if platform == "linux" or platform == "linux2":
    plt.rc('font', family='NanumBarunGothic')
else:
    plt.rc('font', family='Malgun Gothic')
plt.rc('axes', unicode_minus=False)

In [39]:
seed_number = 42
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(seed_number)

In [40]:
dataset_path = 'C:/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun'

train_input_list = sorted(glob(f'{dataset_path}/data/train_input/*.csv'))
train_target_list = sorted(glob(f'{dataset_path}/data/train_target/*.csv'))
test_input_list = sorted(glob(f'{dataset_path}/data/test_input/*.csv'))
test_target_list = sorted(glob(f'{dataset_path}/data/test_target/*.csv'))

In [41]:
pd.read_csv(train_input_list[0]).columns, len(pd.read_csv(train_input_list[0]).columns)

(Index(['시간', '내부온도관측치', '내부습도관측치', 'CO2관측치', 'EC관측치', '외부온도관측치', '외부습도관측치',
        '펌프상태', '펌프작동남은시간', '최근분무량', '일간누적분무량', '냉방상태', '냉방작동남은시간', '난방상태',
        '난방작동남은시간', '내부유동팬상태', '내부유동팬작동남은시간', '외부환기팬상태', '외부환기팬작동남은시간',
        '화이트 LED상태', '화이트 LED작동남은시간', '화이트 LED동작강도', '레드 LED상태', '레드 LED작동남은시간',
        '레드 LED동작강도', '블루 LED상태', '블루 LED작동남은시간', '블루 LED동작강도', '카메라상태', '냉방온도',
        '난방온도', '기준온도', '난방부하', '냉방부하', '총추정광량', '백색광추정광량', '적색광추정광량',
        '청색광추정광량'],
       dtype='object'),
 38)

### 데이터 병합 및 컬럼 수정 

In [42]:
import argparse

In [43]:
parser = argparse.ArgumentParser(description="baseline")
parser.add_argument('--best_n', default=8, type=int)
parser.add_argument('--missing', default="f", type=str) # b, f, i
parser.add_argument('--scaler', default="robust", type=str) # standard or minmax or robust
parser.add_argument('--cv', default=10, type=int)
parser.add_argument('--seed', default=1011, type=int)
args = parser.parse_args('')

best_n = args.best_n
missing = args.missing
scaler = args.scaler
cv = args.cv
seed = args.seed

In [44]:
from tqdm import tqdm
import datetime

feature_list = [
    '시간',
    '내부온도관측치', '내부습도관측치',
    'CO2관측치', 'EC관측치',
    '외부온도관측치', '외부습도관측치',
    '펌프상태', '펌프작동남은시간', '최근분무량', '일간누적분무량',
    '냉방상태', '냉방작동남은시간', '난방상태', '난방작동남은시간',
    '내부유동팬상태', '내부유동팬작동남은시간', '외부환기팬상태', '외부환기팬작동남은시간',
    '화이트 LED상태', '화이트 LED작동남은시간', '화이트 LED동작강도',
    '레드 LED상태', '레드 LED작동남은시간', '레드 LED동작강도',
    '블루 LED상태', '블루 LED작동남은시간', '블루 LED동작강도',
    '카메라상태',
    '냉방온도', '난방온도', '기준온도', '난방부하', '냉방부하',
    '총추정광량', '백색광추정광량', '적색광추정광량', '청색광추정광량'
]


def preprocess_data(input_paths, target_paths):
    
    idx = 0
    
    temp_df = pd.DataFrame()
    for input_path, target_path in tqdm(zip(input_paths, target_paths)):
        
        idx+=1
        
        input_df = pd.read_csv(input_path) # input 
        target_df = pd.read_csv(target_path) # target 
        
        input_df.columns = feature_list  # 
        
        input_df["시간"] = pd.to_datetime(input_df["시간"]) + datetime.timedelta(days=1)
        input_df["year"] = input_df["시간"].dt.year
        input_df["month"] = input_df["시간"].dt.month
        input_df["day"] = input_df["시간"].dt.day
        
        target_df["시간"] = pd.to_datetime(target_df["시간"]) 
        target_df["year"] = input_df["시간"].dt.year
        target_df["month"] = target_df["시간"].dt.month
        target_df["day"] = target_df["시간"].dt.day
        
        input_df = input_df.groupby(by=["year", "month", "day"]).median().reset_index()
        target_df = target_df.groupby(by=["year", "month", "day"]).median().reset_index()
        
        if missing == "b":
            input_df = input_df.fillna(method='bfill')
        if missing == "f":
            input_df = input_df.fillna(method='ffill')
        if missing == "i":
            input_df = input_df.interpolate()
        
        input_df["time"] = np.arange(0, len(input_df))
        input_df["case"] = idx
        df = pd.merge(input_df, target_df) # 시간 기준 rate 합쳐줌 
        temp_df = pd.concat([temp_df, df], axis=0)
        
    temp_df = temp_df.reset_index(drop=True)    
    return temp_df

train_df = preprocess_data(train_input_list, train_target_list)
test_df = preprocess_data(test_input_list, test_target_list)

58it [00:35,  1.65it/s]
6it [00:03,  1.68it/s]


```
Dataset Info.

train_input [폴더] - 총 58개 청경채 케이스
각 청경채 케이스 별 환경 데이터 (1분 간격)


train_target [폴더] - 총 58개 청경채 케이스
rate : 각 청경채 케이스 별 잎 면적 증감률 (1일 간격)


test_input [폴더] - 총 6개 청경채 케이스
각 청경채 케이스 별 환경 데이터 (1분 간격)


test_target [폴더] - 총 6개 청경채 케이스
rate : 각 청경채 케이스 별 잎 면적 증감률 (1일 간격)
제출을 위한 양식으로 label에 해당되는 rate의 값은 모두 0으로 가려져있습니다.


submission 은 각 케이스 별로 쪼갠 rate값에 대한 RMSE*100 값을 입력하여 제출
```



In [45]:
train_df.shape , test_df.shape

((1813, 43), (195, 43))

In [46]:
test_df.head()

Unnamed: 0,year,month,day,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,...,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량,time,case,rate
0,2021,3,27,27.6,24.200001,486.0,1.825927,28.4,18.4,0.0,...,22.165345,0.0,18.999996,0.0,0.0,0.0,0.0,0,1,0.0
1,2021,3,28,29.299999,40.200001,474.0,1.874167,30.200001,21.299999,0.0,...,22.150231,0.0,25.618236,0.0,0.0,0.0,0.0,1,1,0.0
2,2021,3,29,32.0,44.700001,432.0,1.854575,35.299999,26.1,0.0,...,22.175592,0.0,41.499996,0.0,0.0,0.0,0.0,2,1,0.0
3,2021,3,30,30.5,43.0,445.0,1.894089,34.700001,18.700001,0.0,...,20.5,0.0,41.499996,0.0,0.0,0.0,0.0,3,1,0.0
4,2021,3,31,30.6,39.099998,440.0,1.909895,31.75,10.2,0.0,...,22.179347,0.0,25.500002,102.1053,102.1053,0.0,0.0,4,1,0.0


Target이 Rate

- 외부 온도 관측치, 외부 습도 관측치 결측치 발생

In [47]:
train_df.columns

Index(['year', 'month', 'day', '내부온도관측치', '내부습도관측치', 'CO2관측치', 'EC관측치',
       '외부온도관측치', '외부습도관측치', '펌프상태', '펌프작동남은시간', '최근분무량', '일간누적분무량', '냉방상태',
       '냉방작동남은시간', '난방상태', '난방작동남은시간', '내부유동팬상태', '내부유동팬작동남은시간', '외부환기팬상태',
       '외부환기팬작동남은시간', '화이트 LED상태', '화이트 LED작동남은시간', '화이트 LED동작강도', '레드 LED상태',
       '레드 LED작동남은시간', '레드 LED동작강도', '블루 LED상태', '블루 LED작동남은시간', '블루 LED동작강도',
       '카메라상태', '냉방온도', '난방온도', '기준온도', '난방부하', '냉방부하', '총추정광량', '백색광추정광량',
       '적색광추정광량', '청색광추정광량', 'time', 'case', 'rate'],
      dtype='object')

In [48]:
train_df

Unnamed: 0,year,month,day,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,...,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량,time,case,rate
0,2021,2,18,22.299999,26.700001,397.0,0.917129,19.100000,10.000000,0.0,...,20.705163,0.0,1.000004,0.0000,0.00000,0.0000,0.00000,0,1,0.50000
1,2021,2,19,21.900000,33.299999,357.0,0.910134,17.700001,11.100000,0.0,...,20.665922,0.0,0.000000,0.0000,0.00000,0.0000,0.00000,1,1,0.66667
2,2021,2,20,22.150001,31.349999,354.0,0.910134,18.100000,13.400000,0.0,...,20.666866,0.0,0.000000,0.0000,0.00000,0.0000,0.00000,2,1,0.60000
3,2021,2,21,23.600000,23.100000,349.0,0.910134,20.200001,20.299999,0.0,...,20.665137,0.0,1.999998,0.0000,0.00000,0.0000,0.00000,3,1,-0.12500
4,2021,2,22,24.650001,44.799999,373.0,0.910134,22.000000,22.500000,0.0,...,20.675179,0.0,3.999996,0.0000,0.00000,0.0000,0.00000,4,1,1.42857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1808,2022,6,11,23.799999,65.800003,459.0,0.011633,25.200001,41.500000,0.0,...,15.428755,0.0,40.500002,111.7692,95.91710,6.6192,6.26600,20,58,-0.07888
1809,2022,6,12,23.500000,64.199997,505.0,0.010857,25.200001,41.500000,0.0,...,15.425492,0.0,40.500002,111.7692,95.91710,6.6192,4.69950,21,58,-0.15838
1810,2022,6,13,23.250000,63.200001,415.5,0.010082,25.200001,41.500000,0.0,...,15.433483,0.0,38.999996,111.3481,95.91710,6.6192,5.48275,22,58,-0.47374
1811,2022,6,14,23.299999,63.200001,479.0,0.010082,25.200001,41.500000,0.0,...,15.426235,0.0,36.999998,111.7692,99.01120,6.6192,6.26600,23,58,-0.51767


- 난방상태, 냉방상태 , 카메라 상태 펌프상태 펌프작동남은시간     

In [49]:
train_df = train_df.fillna(method="bfill") #  bfill로 할경우 결측값을 바로 아래 값과 동일하게 변경합니다. 
# 보간법의 원리 
test_df = test_df.fillna(method="bfill")

print(train_df.shape, test_df.shape)
print(train_df.isnull().sum().sum(),test_df.isnull().sum().sum())

(1813, 43) (195, 43)
0 0


### FE

In [51]:
train_df["spring"] = train_df["month"].apply(lambda x : 1 if x>=3 and x<=5 else 0)
train_df["summer"] = train_df["month"].apply(lambda x : 1 if x>=6 and x<=8 else 0)
train_df["fall"] = train_df["month"].apply(lambda x : 1 if x>=9 and x<=11 else 0)
train_df["winter"] = train_df["month"].apply(lambda x : 1 if x>=12 or x<=2 else 0)

test_df["spring"] = test_df["month"].apply(lambda x : 1 if x>=3 and x<=5 else 0)
test_df["summer"] = test_df["month"].apply(lambda x : 1 if x>=6 and x<=8 else 0)
test_df["fall"] = test_df["month"].apply(lambda x : 1 if x>=9 and x<=11 else 0)
test_df["winter"] = test_df["month"].apply(lambda x : 1 if x>=12 or x<=2 else 0)

train_df.shape, test_df.shape

((1813, 47), (195, 47))

In [52]:
ignore_features = ['펌프상태','난방상태','난방작동남은시간','내부유동팬상태','외부온도관측치','외부습도관측치',
                   '화이트 LED상태', '화이트 LED작동남은시간', '화이트 LED동작강도', '레드 LED상태', '레드 LED작동남은시간', 
                   '레드 LED동작강도', '블루 LED상태', '블루 LED작동남은시간', '블루 LED동작강도', '카메라상태', '냉방온도', 
                   '난방온도', '기준온도', '난방부하', '냉방부하',"year", "day"] 

feature = list(set(train_df.columns.tolist()) - set(ignore_features))

train_df = train_df[feature]
test_df = test_df[feature]
train_df.shape, test_df.shape

((1813, 24), (195, 24))

In [53]:
feature

['냉방작동남은시간',
 '최근분무량',
 '외부환기팬상태',
 'case',
 'summer',
 '내부유동팬작동남은시간',
 'fall',
 '내부습도관측치',
 'CO2관측치',
 '총추정광량',
 '백색광추정광량',
 'spring',
 'month',
 'time',
 '적색광추정광량',
 '청색광추정광량',
 '내부온도관측치',
 '일간누적분무량',
 '펌프작동남은시간',
 '외부환기팬작동남은시간',
 'EC관측치',
 'winter',
 '냉방상태',
 'rate']

In [54]:
train_X = train_df.drop(columns = 'rate')
train_y = train_df['rate']

test_X = test_df.drop(columns='rate')
test_y = test_df['rate']


train_X.shape, test_X.shape, train_y.shape, test_y.shape

((1813, 23), (195, 23), (1813,), (195,))

In [55]:
# y Transform
IQR = np.quantile(train_y, 0.75) - np.quantile(train_y, 0.25)
min_value = np.quantile(train_y, 0.25) - IQR #* 1.5
max_value = np.quantile(train_y, 0.75) + IQR #* 1.5
train_y = train_y.apply(lambda x : x if x>min_value else min_value)
train_y = train_y.apply(lambda x : x if x<max_value else max_value)

train_y.shape

(1813,)

https://dacon.io/competitions/official/235961/codeshare/6591?page=1&dtype=recent

In [57]:
from sklearn.preprocessing import StandardScaler, RobustScaler
rb = RobustScaler()
train_X = rb.fit_transform(train_X)
test_X = rb.transform(test_X)

In [58]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

device = torch.device('cpu')


train_X = torch.FloatTensor(train_X)
test_X = torch.FloatTensor(test_X)
train_y = torch.FloatTensor(train_y).reshape(-1,1)

In [59]:
linear1 = torch.nn.Linear(train_X.shape[1], 512, bias=True)
linear3 = torch.nn.Linear(512, 256, bias=True)
linear4 = torch.nn.Linear(256, 128, bias=True)
linear5 = torch.nn.Linear(128, 64, bias=True)
linear6 = torch.nn.Linear(64, 32, bias=True)
linear7 = torch.nn.Linear(32, 10, bias=True)
linear8 = torch.nn.Linear(10, 1, bias=True)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.1)

In [60]:
model = torch.nn.Sequential(linear1,relu,
                            linear3,relu,
                            linear4, relu,
                            linear5, relu,
                            linear6, relu,
                            linear7, relu,
                            linear8).to(device)

In [61]:
# nn 패키지를 사용하여 모델과 손실 함수를 정의합니다.
loss_fn = torch.nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

torch.nn.init.xavier_normal_(linear1.weight)
torch.nn.init.xavier_normal_(linear2.weight)
torch.nn.init.xavier_normal_(linear3.weight)
torch.nn.init.xavier_normal_(linear4.weight)
torch.nn.init.xavier_normal_(linear5.weight)
torch.nn.init.xavier_normal_(linear6.weight)
torch.nn.init.xavier_normal_(linear7.weight)
torch.nn.init.xavier_normal_(linear8.weight)

Parameter containing:
tensor([[-0.1239,  0.3789, -0.2748,  0.2951,  0.0612,  0.2898,  0.2660, -0.4028,
         -1.1738,  0.6484]], requires_grad=True)

In [62]:
# 모델 학습
for epoch in range(500):
    
    # grad 초기화
    optimizer.zero_grad()
    
    # H(x)
    output = model(train_X)
    
    # cost 계산
    cost = loss_fn(output, train_y)
    
    # cost로 H(x) 개선
    cost.backward()
    optimizer.step()

    # epoch 50마다 cost, error 찍기
    if epoch % 50 == 0:
        # mape
        err = 100 * sum((train_y-output) ** 2) / len(train_y)
        
        # err = mean_squared_error(y_train, output.int())
        print(f"Epoch: {epoch} Cost: {cost:.4f} Error: {err.item():.4f}")

Epoch: 0 Cost: 5637.3735 Error: 563734.9375
Epoch: 50 Cost: 0.0525 Error: 5.2512
Epoch: 100 Cost: 0.0272 Error: 2.7202
Epoch: 150 Cost: 0.0230 Error: 2.2978
Epoch: 200 Cost: 0.0198 Error: 1.9824
Epoch: 250 Cost: 0.0187 Error: 1.8662
Epoch: 300 Cost: 0.0178 Error: 1.7761
Epoch: 350 Cost: 0.0171 Error: 1.7128
Epoch: 400 Cost: 0.0161 Error: 1.6145
Epoch: 450 Cost: 0.0156 Error: 1.5591


In [63]:
y_pred = model(test_X)
y_pred = y_pred.detach().numpy().flatten()

In [64]:
y_pred

array([ 4.49999452e-01,  2.51038790e-01,  2.40744621e-01,  2.45523810e-01,
        3.41639400e-01,  2.93431520e-01,  2.96560943e-01,  2.91573703e-01,
        3.07601571e-01,  2.89110005e-01,  2.82938957e-01,  2.92678952e-01,
        2.82285690e-01,  2.94126272e-01,  2.82851756e-01,  2.90439099e-01,
        2.71272957e-01,  2.60990143e-01,  2.30048865e-01,  2.59990036e-01,
        2.66768694e-01,  2.51295120e-01,  2.17436865e-01,  2.66827822e-01,
        2.55916446e-01,  2.45303929e-01,  2.15218157e-01,  1.61802620e-01,
        9.24175233e-02,  3.92634332e-01,  3.87614131e-01,  3.83087516e-01,
        3.79704475e-01,  3.75331938e-01,  3.66515219e-01,  3.58719826e-01,
        3.49904299e-01,  3.33269894e-01,  3.02597165e-01,  2.94513524e-01,
        2.91870654e-01,  2.78298438e-01,  2.58221090e-01,  2.39664704e-01,
        2.29472682e-01,  2.12477937e-01,  1.97860867e-01,  1.80150464e-01,
        2.16303408e-01,  1.64283574e-01,  1.27360672e-01,  1.28747419e-01,
        1.19841665e-01,  

In [65]:
base_path = f'C:/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data'

In [66]:
submit_path = f'{base_path}/sample_submission'

submit_df1 = pd.read_csv(f"{submit_path}/TEST_01.csv")
submit_df1.head()

Unnamed: 0,시간,rate
0,2021-03-27 00:00:00,0.57618
1,2021-03-28 00:00:00,0.261682
2,2021-03-29 00:00:00,0.347749
3,2021-03-30 00:00:00,0.278188
4,2021-03-31 00:00:00,0.419071


In [67]:
# ((29, 2), (35, 2), (26, 2), (32, 2), (37, 2), (36, 2))
submit_df1 = pd.read_csv(f"{submit_path}/TEST_01.csv")
submit_df1["rate"] = y_pred[:29]
submit_df1.to_csv(f"{submit_path}/TEST_01.csv", index=False)

submit_df2 = pd.read_csv(f"{submit_path}/TEST_02.csv")
submit_df2["rate"] = y_pred[29:29+35]
submit_df2.to_csv(f"{submit_path}/TEST_02.csv", index=False)

submit_df3 = pd.read_csv(f"{submit_path}/TEST_03.csv")
submit_df3["rate"] = y_pred[29+35:29+35+26]
submit_df3.to_csv(f"{submit_path}/TEST_03.csv", index=False)

submit_df4 = pd.read_csv(f"{submit_path}/TEST_04.csv")
submit_df4["rate"] = y_pred[29+35+26:29+35+26+32]
submit_df4.to_csv(f"{submit_path}/TEST_04.csv", index=False)

submit_df5 = pd.read_csv(f"{submit_path}/TEST_05.csv")
submit_df5["rate"] = y_pred[29+35+26+32:29+35+26+32+37]
submit_df5.to_csv(f"{submit_path}/TEST_05.csv", index=False)

submit_df6 = pd.read_csv(f"{submit_path}/TEST_06.csv")
submit_df6["rate"] = y_pred[29+35+26+32+37:]
submit_df6.to_csv(f"{submit_path}/TEST_06.csv", index=False)


submit_df1.shape, submit_df2.shape, submit_df3.shape, submit_df4.shape, submit_df5.shape, submit_df6.shape

((29, 2), (35, 2), (26, 2), (32, 2), (37, 2), (36, 2))

In [68]:
submit_df1

Unnamed: 0,시간,rate
0,2021-03-27 00:00:00,0.449999
1,2021-03-28 00:00:00,0.251039
2,2021-03-29 00:00:00,0.240745
3,2021-03-30 00:00:00,0.245524
4,2021-03-31 00:00:00,0.341639
5,2021-04-01 00:00:00,0.293432
6,2021-04-02 00:00:00,0.296561
7,2021-04-03 00:00:00,0.291574
8,2021-04-04 00:00:00,0.307602
9,2021-04-05 00:00:00,0.28911


In [69]:
test_target_list = [f'{i[1:]}'.replace('\\','/') for i in test_target_list]

In [70]:
test_target_list

[':/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data/test_target/TEST_01.csv',
 ':/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data/test_target/TEST_02.csv',
 ':/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data/test_target/TEST_03.csv',
 ':/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data/test_target/TEST_04.csv',
 ':/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data/test_target/TEST_05.csv',
 ':/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data/test_target/TEST_06.csv']

In [71]:
import zipfile
base_path = 'C:/Users/sanga/Desktop/O9O9/5959-deeplearning-project/jeongeun/data'
os.chdir(f'{base_path}/sample_submission/')
submission = zipfile.ZipFile(f"{submit_path}/sample_submission_1.zip", 'w')
for path in test_target_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()