## Get all actual data from the csv files, and set date as index.

In [1]:
import configparser
import pandas as pd
import ast
import glob
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [2]:
import constants as con

↑ constants.py file contains some items like below.
~~~ constants.py
csv_encoding = 'Shift_JISx0213'
csv_path = '..\\data\\**\\E_Detail*.csv'
necessary_columns = ['Date', 'Class-1', 'Class-2', 'Class-3', 'Col-1', 'Col-2', 'Col-3', 'Col-4', 'Col-5', 'Col-6', 'Col-7']
~~~

### Find all csv files in the directory.

In [3]:
csvs = glob.glob(con.csv_path)
print(csvs)

['data\\20190621\\E_Detail_20190621.csv', 'data\\20190622\\E_Detail_20190622.csv', 'data\\20190623\\E_Detail_20190623.csv', 'data\\20190624\\E_Detail_20190624.csv', 'data\\20190625\\E_Detail_20190625.csv', 'data\\20190626\\E_Detail_20190626.csv', 'data\\20190627\\E_Detail_20190627.csv', 'data\\20190628\\E_Detail_20190628.csv', 'data\\20190629\\E_Detail_20190629.csv', 'data\\20190630\\E_Detail_20190630.csv', 'data\\20190701\\E_Detail_20190701.csv', 'data\\20190702\\E_Detail_20190702.csv', 'data\\20190703\\E_Detail_20190703.csv']


### Get all data from csvs.

In [4]:
df = pd.DataFrame()
for csv in csvs:
    dfTmp = pd.read_csv(csv, encoding=con.csv_encoding)
    df = df.append(dfTmp[con.necessary_columns])

##### The DataFrame contains data like below.
For showing the DataFrame, I replaced the actual value with another.

In [5]:
df_show = df.rename(columns=con.shown_columns)
df_show['Class-1'] = df_show['Class-1'].map(con.shown_value_Class1)
df_show['Class-3'] = df_show['Class-3'].map(con.shown_value_Class3)
display(df_show[2185:2200])

Unnamed: 0,Date,Class-1,Class-2,Class-3,Col-1,Col-2,Col-3,Col-4,Col-5,Col-6,Col-7
498,19-06-25,EN,37,MA,44591,0,0,0,0,0,0
499,19-06-25,EN,38,MA,44591,0,0,0,0,0,0
500,19-06-25,EN,39,SD,0,0,0,0,0,0,0
0,19-06-26,JA,1,MA,63277,0,0,0,0,0,0
1,19-06-26,JA,2,MA,63277,0,0,0,0,0,0
2,19-06-26,JA,3,MA,63277,0,0,0,0,0,0
3,19-06-26,JA,4,SD,8430,0,0,0,0,0,0
4,19-06-26,JA,5,SD,8417,0,0,0,0,0,0
5,19-06-26,JA,6,SD,8401,0,0,0,0,0,0
6,19-06-26,JA,7,SD,100,11820,0,4278,0,0,0


### Multiply each value by the each KPI rate then make KPI.
kpi_rate is defined in the constant.py file like below.
~~~ constants.py
kpi_rate = [1, 0.5, 0.6, 0.5, 0.6, 0.6, 0.6]
~~~

In [6]:
df[con.kpi_columns] = df[con.kpi_columns] * np.array(con.kpi_rate)

### Add a column for sum of KPI.

In [7]:
df[con.col_total] = df[con.kpi_columns].sum(axis=1)
#display(df[2185:2200])

##### The DataFrame contains data like below.
For showing the DataFrame, I replaced the actual value with another.

In [10]:
df_show = df.rename(columns=con.shown_columns)
df_show['Class-1'] = df_show['Class-1'].map(con.shown_value_Class1)
df_show['Class-3'] = df_show['Class-3'].map(con.shown_value_Class3)
display(df_show[2185:2200])

Unnamed: 0,Date,Class-1,Class-2,Class-3,Col-1,Col-2,Col-3,Col-4,Col-5,Col-6,Col-7,KPI
498,19-06-25,EN,37,MA,44591.0,0.0,0.0,0.0,0.0,0.0,0.0,44591.0
499,19-06-25,EN,38,MA,44591.0,0.0,0.0,0.0,0.0,0.0,0.0,44591.0
500,19-06-25,EN,39,SD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,19-06-26,JA,1,MA,63277.0,0.0,0.0,0.0,0.0,0.0,0.0,63277.0
1,19-06-26,JA,2,MA,63277.0,0.0,0.0,0.0,0.0,0.0,0.0,63277.0
2,19-06-26,JA,3,MA,63277.0,0.0,0.0,0.0,0.0,0.0,0.0,63277.0
3,19-06-26,JA,4,SD,8430.0,0.0,0.0,0.0,0.0,0.0,0.0,8430.0
4,19-06-26,JA,5,SD,8417.0,0.0,0.0,0.0,0.0,0.0,0.0,8417.0
5,19-06-26,JA,6,SD,8401.0,0.0,0.0,0.0,0.0,0.0,0.0,8401.0
6,19-06-26,JA,7,SD,100.0,5910.0,0.0,2139.0,0.0,0.0,0.0,8149.0


### <font color='FFFFFF'>Calculete the sum of the KPI by each day, each Subject and each Question Number.</font>
In the case of "M+目視", the record is separated by each mark values.
That's why I should "groupby" with Question Number.

In [9]:
df = df.groupby(ast.literal_eval(ini.get('header_detail', 'classify')))[ini.get('header_detail', 'kpi')].sum()


NameError: name 'ini' is not defined