# Load packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import os
import seaborn as sns

from glob import glob
from matplotlib import pyplot as plt

# Configuration

In [None]:

# Korean font
from matplotlib import font_manager, rc
try:
    font_path = "C:/Windows/Fonts/malgun.TTF"
    font = font_manager.FontProperties(fname=font_path).get_name()
    rc('font', family=font)
except:
    pass

# Fix minus presentation
mpl.rcParams['axes.unicode_minus'] = False

# Load data

In [None]:
# Get paths
path_kospi = "data/external_data/kospi_20060102_20220701.csv"

In [None]:
# Load daily kospi data from 20060102 ~ 20220701
df_kospi = pd.read_csv(path_kospi, encoding='euc-kr')
df_kospi

# Preprocessing

In [None]:
# Change some column name from Korean to English
df_kospi.rename(columns={'종가': 'Close', '일자': 'Date'}, inplace=True)
df_kospi.head()

In [None]:
# Change date column type as pandas datetime
df_kospi.Date = pd.to_datetime(df_kospi.Date)

# Assign year, month, day columns
df_kospi['Year'] = df_kospi.Date.dt.year
df_kospi['Month'] = df_kospi.Date.dt.month
df_kospi['Day'] = df_kospi.Date.dt.day
df_kospi.head()

# Assign class by month
df_kospi['Class'] = df_kospi.apply(
    lambda x: 'May-Oct' if x['Date'].month in [5, 6, 7, 8, 9, 10] else 'Nov-Apr', axis=1
    )

df_kospi.head()

In [None]:
# Get average monthly kospi
df_mthly_avg_kospi = df_kospi.resample(
    'm', on='Date')['Close'].mean().reset_index().rename(columns={'Close': 'Avg_Close'})

# Assign month class: May ~ Oct -> class 0; Nov ~ Apr -> class 1
df_mthly_avg_kospi['Class'] = df_mthly_avg_kospi.apply(
    lambda x: 'May-Oct' if x['Date'].month in [5, 6, 7, 8, 9, 10] else 'Nov-Apr', axis=1
    )
df_mthly_avg_kospi.Class = df_mthly_avg_kospi.Class.astype('category')

# Assign year and month column
df_mthly_avg_kospi['Year'] = df_mthly_avg_kospi.apply(
    lambda x: x['Date'].year - 1 if x['Date'].month in [1, 2, 3, 4] else x['Date'].year, axis=1)
df_mthly_avg_kospi['Month'] = df_mthly_avg_kospi.Date.dt.month

print(f'Class value counts \n{df_mthly_avg_kospi.Class.value_counts()}')
df_mthly_avg_kospi.head(15)

In [None]:
# Make make DataFrame
df_kospi_chg_yr_cls = df_kospi.groupby(by=['Year', 'Class']).size().reset_index().rename(columns={0: 'Change'})
df_kospi_chg_yr_cls = df_kospi_chg_yr_cls[df_kospi_chg_yr_cls.Year < 2022]

# Get info
df_fst_may = df_kospi[df_kospi.Month==5].groupby(by='Year').min().loc[:, ['Date', 'Close', 'Day', 'Class']]
df_lst_oct = df_kospi[df_kospi.Month==10].groupby(by='Year').max().loc[:, ['Date', 'Close', 'Day', 'Class']]
df_fst_nov = df_kospi[df_kospi.Month==11].groupby(by='Year').min().loc[:, ['Date', 'Close', 'Day', 'Class']]
df_lst_apr = df_kospi[df_kospi.Month==4].groupby(by='Year').max().loc[:, ['Date', 'Close', 'Day', 'Class']]

# Get Last of Oct - First of May of each year
May_Oct_chg = (df_lst_oct.Close - df_fst_may.Close).dropna()

# Get Last of Apr(next year) - First of Nov of each year
df_lst_apr.index -= 1
Nov_Apr_chg = (df_lst_apr.Close - df_fst_nov.Close).dropna()

# Assign kospi change by year and class
df_kospi_chg_yr_cls.Change.iloc[0::2] = May_Oct_chg
df_kospi_chg_yr_cls.Change.iloc[1::2] = Nov_Apr_chg

# Assign sign
df_kospi_chg_yr_cls['Sign'] = df_kospi_chg_yr_cls.Change.apply(lambda x: 'Pos' if x >= 0 else 'Neg')

# Value assign check
print(f'May to Oct \n{May_Oct_chg.head()}')
print(f'Nov to Apr \n{Nov_Apr_chg.head()}')
print(f'Result \n{df_kospi_chg_yr_cls.head()}')

# Visualization

In [None]:
# plot - Monthly averaged kospi index by two month class
fig_mthly_avg_kospi, ax_mthly_avg_kospi = plt.subplots(1, 1, figsize=(14, 8))

for i in range(len(df_mthly_avg_kospi)-1):    
    ax_mthly_avg_kospi.plot(
        [df_mthly_avg_kospi.index[i], df_mthly_avg_kospi.index[i+1]],
        [df_mthly_avg_kospi.Avg_Close.iat[i], df_mthly_avg_kospi.Avg_Close.iat[i+1]],
        color=f'C{df_mthly_avg_kospi.Class.cat.codes.iat[i]}', 
        linewidth=2, 
        marker='o',
        markersize=2,
        )

legend_lines = [
    mpl.lines.Line2D([0], [0], color=f'C{code}', lw=2) for code in df_mthly_avg_kospi.Class.unique().codes]
legend_labels = [g for g in df_mthly_avg_kospi.Class.unique()]

ax_mthly_avg_kospi.legend(legend_lines, legend_labels, title='group')

In [None]:
# plot - Yearly change of Kospi by class
fig_chg_yr_cls, ax_chg_yr_cls = plt.subplots(2, 1, figsize=(16, 14))

sns.barplot(
    x='Year', y='Change', hue='Class', data=df_kospi_chg_yr_cls,
    ax=ax_chg_yr_cls[0]
)
ax_chg_yr_cls[0].grid(axis='x')
df_kospi_chg_yr_cls.groupby(['Class', 'Sign']).size().unstack()

ax_chg_yr_cls[1].bar(
    df_kospi_chg_yr_cls.Year.value_counts().sort_index().index,
    df_kospi_chg_yr_cls[df_kospi_chg_yr_cls.Class=='Nov-Apr'].Change.values - df_kospi_chg_yr_cls[df_kospi_chg_yr_cls.Class=='May-Oct'].Change.values
)
ax_chg_yr_cls[1].grid(axis='x')
ax_chg_yr_cls[1].set_title('[Nov-Apr]-[May-Oct]')