## 2025년 천안시 데이터 분석 공모전


✅주제: 천안시 산업재해와 지역 사회경제·환경·복지 요인(고용, 기상, 소비, 주거, 건강 등) 간의 관계를 시계열적으로 분석하고, <br>
<span style='margin:30px;'>이를 통해 산업안전 및 취약계층 보호를 위한 예측 모델<br><br>
✅자료출처: MDIS, 통계청<br><br>
✅분석순서
<ol> 
    <li>데이터 로드</li>
    <li>데이터 EDA</li>
    <li>변수 선정</li>
    <li>ARIMA, 머신러닝 등으로 시계열 예측하기</li>
</ol>


In [1]:
# 패키지 로드
import glob 
import pandas as pd

### 1. 데이터 로드

#### 👉 주관적 건강 수준 (입력데이터)
- 출처: 보건복지부
- 대상범위: 전국, 임금근로자
- 기간: 2019~2023년

In [55]:
# 데이터 로드 
health = pd.read_csv('1. 기초자료\보건복지부_주관적_건강_수준.csv')

# 칼럼 정리 
health.loc[0] = health.apply(lambda x: 5 if x.loc[0]=="매우 좋다" else 4 if x.loc[0]=="좋은 편이다"  # 각 항목 점수로 매핑
                             else 3 if x.loc[0]=="보통이다"else 2 if x.loc[0]=="나쁜 편이다" else 1 if x.loc[0]=='매우 나쁘다' else x.iloc[0])

health = health[health['특성별(2)'].isin(['특성별(2)','임금근로자'])].copy() # 임금근로자만 필터
health = health.T.iloc[2:] # 행열전환
health = health.reset_index() # 인덱스해제
health.columns = ['날짜','만족도','응답비율']
health['날짜'] = health['날짜'].str.slice(0,4) # 날짜 정리
health.head(3)

Unnamed: 0,날짜,만족도,응답비율
0,2019,5,16.4
1,2019,4,64.9
2,2019,3,16.4


#### 👉 삶에 대한 만족감 (입력데이터)
- 출처: 충남
- 대상범위: 충남(천안시)
- 기간: 2019~2023년

In [85]:
# 루트위치
root = '1. 기초자료/'
# 파일명
file_name = [
            '삶에_대한_만족감_2019.csv',
            '삶에_대한_만족감_2020.csv',
            '삶에_대한_만족감_2021.csv',
            '삶에_대한_만족감_2022.csv',
            '삶에_대한_만족감_2023.csv',
            ]
# 파일정리 + 파일 취합
def make_file(file_name): 
    try:
        df=pd.read_csv(root+file_name)
    except:
        df=pd.read_csv(root+file_name,encoding='cp949')
    try:
        df=df[df['응답자특성별(2)'].isin(['천안시','응답자특성별(2)'])].T.iloc[2:].copy()
    except:
        df=df[df['응답자 특성별(2)'].isin(['천안시','응답자 특성별(2)'])].T.iloc[2:].copy()

    df = df.reset_index()
    df.columns = ['연도','삶의대한만족도','비율_점']
    df['연도'] = df['연도'].str.slice(0,4)
    return df

file_list = []
for file in file_name:
    file_list.append(make_file(file))

life_satisfaction = pd.concat(file_list)
life_satisfaction.head(3)

Unnamed: 0,연도,삶의대한만족도,비율_점
0,2019,0 (전혀 만족하지 않는다) (%),0.6
1,2019,1 (%),0.8
2,2019,2 (%),2.1


#### 👉 건축허가 (입력데이터)
- 출처: 천안시
- 대상범위: 충남(천안시)
- 기간: 2019~2023년

In [90]:
dummy = pd.read_csv('1. 기초자료\천안시_건축허가.csv')
dummy

Unnamed: 0,건축용도별(1),항목,2018,2018.1,2018.2,2018.3,2018.4,2018.5,2018.6,2018.7,...,2023.18,2023.19,2023.20,2023.21,2023.22,2023.23,2023.24,2023.25,2023.26,2023.27
0,건축용도별(1),항목,합계,합계,합계,합계,합계,합계,합계,신축,...,증축·개축·이전·대수선,증축·개축·이전·대수선,증축·개축·이전·대수선,용도변경,용도변경,용도변경,용도변경,용도변경,용도변경,용도변경
1,건축용도별(1),항목,계,콘크리트,철골,조적,철골철근,목조,기타,계,...,철골철근,목조,기타,계,콘크리트,철골,조적,철골철근,목조,기타
2,합계,동수 (동),3141,1068,1894,41,25,85,28,2141,...,39,12,12,114,48,51,11,-,4,-
3,합계,연면적 (㎡),2559681,1669256,823939,3298,55219,6607,1362,2114005,...,78102,672,6323,220802,202698,16943,1085,-,76,-
4,주거용,동수 (동),912,548,299,7,3,53,2,856,...,1,6,-,9,7,-,2,-,-,-
5,주거용,연면적 (㎡),752958,711276,27123,859,7941,5555,204,744114,...,101,608,-,3337,3233,-,104,-,-,-
6,상업용,동수 (동),1057,360,654,22,8,9,4,662,...,4,6,5,98,39,46,9,-,4,-
7,상업용,연면적 (㎡),780450,603055,153504,1668,21608,600,15,634664,...,15142,64,371,211487,196946,13484,981,-,76,-
8,농수산용,동수 (동),535,8,499,7,2,-,19,305,...,-,-,-,-,-,-,-,-,-,-
9,농수산용,연면적 (㎡),323798,861,321199,653,-,-,1085,275891,...,-,-,-,-,-,-,-,-,-,-


#### 출력데이터

In [82]:
life_satisfaction

Unnamed: 0,연도,항목,비율_점
0,2019,0 (전혀 만족하지 않는다) (%),0.6
1,2019,1 (%),0.8
2,2019,2 (%),2.1
3,2019,3 (%),3.9
4,2019,4 (%),4.9
5,2019,5 (보통) (%),29.4
6,2019,6 (%),13.3
7,2019,7 (%),17.9
8,2019,8 (%),16.2
9,2019,9 (%),7.4


In [30]:
health

Unnamed: 0,특성별(1),특성별(2),2019,2019.1,2019.2,2019.3,2019.4,2020,2020.1,2020.2,...,2023,2023.1,2023.2,2023.3,2023.4,2024,2024.1,2024.2,2024.3,2024.4
0,1,1,5.0,4.0,3.0,2.0,1.0,5,4.0,3,...,5.0,4.0,3.0,2.0,1.0,5,4.0,3.0,2,1
1,전체,소계,13.3,55.4,22.4,7.9,1.0,18.4,55.4,18.8,...,21.6,52.5,19.9,5.6,0.5,22.1,52.3,19.4,5.8,0.4
2,성별,남성,15.3,56.4,20.9,6.6,0.8,21.6,55.4,17.1,...,24.5,52.2,18.4,4.5,0.4,24.5,53.0,17.6,4.6,0.3
3,성별,여성,11.4,54.4,24.0,9.1,1.2,15.3,55.3,20.5,...,18.6,52.9,21.4,6.6,0.5,19.7,51.7,21.2,7.0,0.5
4,연령별,15~19세,38.2,56.0,5.3,0.5,0.0,38.5,58.2,2.3,...,55.3,41.7,2.7,0.0,0.3,53.0,44.5,2.5,-,-
5,연령별,20~29세,31.4,60.6,7.5,0.5,0.0,37.0,58.9,3.5,...,47.8,48.9,3.3,0.1,0.0,45.6,51.3,3.0,0.0,-
6,연령별,30~39세,21.5,67.6,9.8,1.0,0.1,30.5,60.9,7.0,...,35.8,58.7,5.1,0.3,0.0,36.5,59.0,4.3,0.2,0.1
7,연령별,40~49세,14.5,67.8,15.3,2.2,0.2,17.8,65.5,14.8,...,18.1,67.1,13.4,1.5,0.0,21.5,65.0,12.1,1.2,0.2
8,연령별,50~59세,5.7,61.5,26.7,5.5,0.5,10.8,62.7,22.4,...,10.7,62.8,23.4,3.0,0.2,12.7,62.5,21.0,3.6,0.2
9,연령별,60세 이상,1.7,37.3,38.1,20.1,2.8,3.5,38.3,36.9,...,4.0,39.4,39.9,15.4,1.3,5.0,38.4,39.9,15.6,1.0
