## 1. GL 불러오고 정형화시키기

### ① 칼럼명 테이블 생성

In [43]:
import pandas as pd
from pandas import *

pd.set_option('display.width', 200)   # 한 줄에 허용되는 너비
pd.set_option('display.max_columns', None) # 모든 컬럼 보이게

df = pd.read_csv('level1_sample_data/GL_Detail.csv', encoding='utf-8', sep=',', nrows=4)


df_columns = df.T.reset_index()

df_columns.columns = ['columnsName', 'record1', 'record2', 'record3', 'record4']

df_columns

Unnamed: 0,columnsName,record1,record2,record3,record4
0,Journal_ID,100000000,100000000,100000001,100000001
1,Journal_ID_Line_Number,1,2,1,2
2,JE_Line_Description,Postkosten ohne Tel.,,Reisekst./Unterkunft,
3,Business_Unit_Code,9900.0,,9900.0,
4,Fiscal_Year,2007,2007,2007,2007
5,GL_Account_Number,473000,113100,474210,113100
6,Amount,9770.52,9770.52,5875.2,5875.2
7,Amount_Credit_Debit_Indicator,S,H,S,H
8,Amount_Currency,USD,USD,USD,USD
9,JE_Header_ Description,,,,


### ② 칼럼명 특수문자 제거

In [44]:
df_columns['columnsNameClean'] = (
    df_columns['columnsName']
        .str.replace(' ', '')
        .str.replace('(', '')
        .str.replace(')', '')
        .str.replace('-', '')
        .str.replace('/', '')
        .str.replace('.', '')
        .str.replace(',', '')
)

df_columns

Unnamed: 0,columnsName,record1,record2,record3,record4,columnsNameClean
0,Journal_ID,100000000,100000000,100000001,100000001,Journal_ID
1,Journal_ID_Line_Number,1,2,1,2,Journal_ID_Line_Number
2,JE_Line_Description,Postkosten ohne Tel.,,Reisekst./Unterkunft,,JE_Line_Description
3,Business_Unit_Code,9900.0,,9900.0,,Business_Unit_Code
4,Fiscal_Year,2007,2007,2007,2007,Fiscal_Year
5,GL_Account_Number,473000,113100,474210,113100,GL_Account_Number
6,Amount,9770.52,9770.52,5875.2,5875.2,Amount
7,Amount_Credit_Debit_Indicator,S,H,S,H,Amount_Credit_Debit_Indicator
8,Amount_Currency,USD,USD,USD,USD,Amount_Currency
9,JE_Header_ Description,,,,,JE_Header_Description


### ③ 표준칼럼명 설정

In [45]:
df_columns['columnsNameStandard'] = [
    "Journal_ID",
    "Journal_ID_Line_Number",
    "JE_Line_Description",
    "Organization_Department_Code",
    "Fiscal_Year",
    "Account_Code",
    "Amount",
    "Debit_Credit_Indicator",
    "Currency",
    "Journal_Header_Description",
    "Entered_By",
    "Document_Date",
    "Entry_Date",
    "Entry_Time"
]

df_columns

Unnamed: 0,columnsName,record1,record2,record3,record4,columnsNameClean,columnsNameStandard
0,Journal_ID,100000000,100000000,100000001,100000001,Journal_ID,Journal_ID
1,Journal_ID_Line_Number,1,2,1,2,Journal_ID_Line_Number,Journal_ID_Line_Number
2,JE_Line_Description,Postkosten ohne Tel.,,Reisekst./Unterkunft,,JE_Line_Description,JE_Line_Description
3,Business_Unit_Code,9900.0,,9900.0,,Business_Unit_Code,Organization_Department_Code
4,Fiscal_Year,2007,2007,2007,2007,Fiscal_Year,Fiscal_Year
5,GL_Account_Number,473000,113100,474210,113100,GL_Account_Number,Account_Code
6,Amount,9770.52,9770.52,5875.2,5875.2,Amount,Amount
7,Amount_Credit_Debit_Indicator,S,H,S,H,Amount_Credit_Debit_Indicator,Debit_Credit_Indicator
8,Amount_Currency,USD,USD,USD,USD,Amount_Currency,Currency
9,JE_Header_ Description,,,,,JE_Header_Description,Journal_Header_Description


### ④ 표준칼럼명으로 데이터불러오기

In [46]:
# ① 표준칼럼명에 값 존재하는 애들만 추출
using_columns = [df_columns['columnsName'][i] for i, d in enumerate(df_columns['columnsNameStandard']) if d]
print(using_columns)

# ② "①"에 존재하는 애들만 추출
df = pd.read_csv('level1_sample_data/GL_Detail.csv', encoding='utf-8', sep=',', usecols=using_columns)

# ③ 표준칼럼명으로 변경
df.columns = [df_columns['columnsNameStandard'][i] for i, d in enumerate(df_columns['columnsNameStandard']) if d]

df

['Journal_ID', 'Journal_ID_Line_Number', 'JE_Line_Description', 'Business_Unit_Code', 'Fiscal_Year', 'GL_Account_Number', 'Amount', 'Amount_Credit_Debit_Indicator', 'Amount_Currency', 'JE_Header_ Description', 'Entered_By', 'Document_Date', 'Entered_Date', 'Entered_Time']


Unnamed: 0,Journal_ID,Journal_ID_Line_Number,JE_Line_Description,Organization_Department_Code,Fiscal_Year,Account_Code,Amount,Debit_Credit_Indicator,Currency,Journal_Header_Description,Entered_By,Document_Date,Entry_Date,Entry_Time
0,100000000,1,Postkosten ohne Tel.,9900,2007,473000,9770.52,S,USD,,STEINER,20070101,20070122,101205
1,100000000,2,,,2007,113100,9770.52,H,USD,,STEINER,20070101,20070122,101205
2,100000001,1,Reisekst./Unterkunft,9900,2007,474210,5875.20,S,USD,,STEINER,20070101,20070122,101206
3,100000001,2,,,2007,113100,5875.20,H,USD,,STEINER,20070101,20070122,101206
4,100000002,1,,9900,2007,474211,244.80,S,USD,,STEINER,20070101,20070122,101206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28155,5000000009,10,,,2007,792000,20000.00,S,USD,,I807424,20071214,20071214,204129
28156,5000000009,11,,,2007,191100,32000.00,H,USD,,I807424,20071214,20071214,204129
28157,5000000009,12,,,2007,231500,12000.00,S,USD,,I807424,20071214,20071214,204129
28158,5100000000,1,,9900,2007,160000,4000.00,H,USD,,I036867,20071010,20071010,100321


### ⑤ 숫자데이터 decimal로 변경하기 (float 에서 깊은 숫자의 차이 발생 오류를 막기 위함)

In [47]:
from decimal import Decimal

# 항상 소수점 아래 세 자리로 출력/계산 되도록 보장
df['Amount'] = [Decimal(i).quantize(Decimal('1.000')) for i in df['Amount']]

### ⑥ 차변금액, 대변금액 칼럼 삽입하기

In [48]:
df.insert(8, 'Debit_Amount', [i if d == 'S' else 0 for i, d in zip(df['Amount'], df['Debit_Credit_Indicator'])])
df.insert(9, 'Credit_Amount', [i if d == 'H' else 0 for i, d in zip(df['Amount'], df['Debit_Credit_Indicator'])])

df

Unnamed: 0,Journal_ID,Journal_ID_Line_Number,JE_Line_Description,Organization_Department_Code,Fiscal_Year,Account_Code,Amount,Debit_Credit_Indicator,Debit_Amount,Credit_Amount,Currency,Journal_Header_Description,Entered_By,Document_Date,Entry_Date,Entry_Time
0,100000000,1,Postkosten ohne Tel.,9900,2007,473000,9770.520,S,9770.520,0,USD,,STEINER,20070101,20070122,101205
1,100000000,2,,,2007,113100,9770.520,H,0,9770.520,USD,,STEINER,20070101,20070122,101205
2,100000001,1,Reisekst./Unterkunft,9900,2007,474210,5875.200,S,5875.200,0,USD,,STEINER,20070101,20070122,101206
3,100000001,2,,,2007,113100,5875.200,H,0,5875.200,USD,,STEINER,20070101,20070122,101206
4,100000002,1,,9900,2007,474211,244.800,S,244.800,0,USD,,STEINER,20070101,20070122,101206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28155,5000000009,10,,,2007,792000,20000.000,S,20000.000,0,USD,,I807424,20071214,20071214,204129
28156,5000000009,11,,,2007,191100,32000.000,H,0,32000.000,USD,,I807424,20071214,20071214,204129
28157,5000000009,12,,,2007,231500,12000.000,S,12000.000,0,USD,,I807424,20071214,20071214,204129
28158,5100000000,1,,9900,2007,160000,4000.000,H,0,4000.000,USD,,I036867,20071010,20071010,100321


## 2-1. GL로 TB 만들기

In [49]:
# Account code 별 그룹화 하여 집계데이터 만들기
groupby_df = df.groupby(['Account_Code'])

df_tb = pd.DataFrame({
    'Frequency': groupby_df['Debit_Amount'].count(),
    'Debit_Sum': groupby_df['Credit_Amount'].sum(),
    'Credit_Sum': groupby_df['Debit_Amount'].sum(),
})

# 모양 출력해보기
print(df_tb)

# 잔액칼럼 만들기

df_tb['Debit_Balance'] = [(i[0] - i[1]) if i[0] - i[1] > 0 else 0 for i in zip(df_tb['Debit_Sum'], df_tb['Credit_Sum'])]
df_tb['Credit_Balance'] = [(i[1] - i[0]) if i[1] - i[0] > 0 else 0 for i in zip(df_tb['Debit_Sum'], df_tb['Credit_Sum'])]

df_tb['Debit_Balance'] = [Decimal(i).quantize(Decimal('1.000')) for i in df_tb['Debit_Balance']]
df_tb['Credit_Balance'] = [Decimal(i).quantize(Decimal('1.000')) for i in df_tb['Credit_Balance']]

df_tb


              Frequency     Debit_Sum   Credit_Sum
Account_Code                                      
1010                132   2089562.000            0
11010                86    131247.000            0
21010                42     32840.000            0
32000                 2             0   317976.260
113100             5150  38743045.400  3938190.460
...                 ...           ...          ...
892000                1             0   100000.000
893015                6             0      815.000
893020                3     13000.000            0
894025                7             0    28704.500
895000               61   1714361.610     1424.690

[105 rows x 3 columns]


Unnamed: 0_level_0,Frequency,Debit_Sum,Credit_Sum,Debit_Balance,Credit_Balance
Account_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1010,132,2089562.000,0,2089562.000,0.000
11010,86,131247.000,0,131247.000,0.000
21010,42,32840.000,0,32840.000,0.000
32000,2,0,317976.260,0.000,317976.260
113100,5150,38743045.400,3938190.460,34804854.940,0.000
...,...,...,...,...,...
892000,1,0,100000.000,0.000,100000.000
893015,6,0,815.000,0.000,815.000
893020,3,13000.000,0,13000.000,0.000
894025,7,0,28704.500,0.000,28704.500


## 2-2. GL로 특이 분개 찾기

### ① 금액기준 특이분개 찾기

### ② 날짜기준 특이분개 찾기

### ③ 담당자기준 특이분개 찾기