In [92]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.style.use('seaborn')
mpl.rc('font',family='Malgun Gothic') # 한글폰트 설정
mpl.rcParams['axes.unicode_minus'] = False # 마이너스 단위 설정

실데이터 가져오기

In [93]:
# 실제 데이터를 가져오는 사용자함수
def get_data(table_name):
    import pandas as pd
    import mariadb
    import sys

    # Connect to MariaDB Platform
    try:
        conn = mariadb.connect(
            user="root",
            password="root",
            host="localhost",
            port=3306,
            database="smart_factory"
        )
    except mariadb.Error as e:
        print(f"Error connecting to MariaDB Platform: {e}")
        sys.exit(1)
    
    # Get Cursor
    cur = conn.cursor()
    cur.execute(f"select 수주일자, 거래처코드, 제품명, 지역16, 지역, 수주량 from `{table_name}`, `production_re` where `production_re`.`제품코드` = `{table_name}`.`제품코드`")
    x = cur.fetchall()
    df = pd.DataFrame(x,columns=['SOLDDATE','CUSTID','PRODNAME','REGION','REGION2','QUANT'])
    df.QUANT = df.QUANT.astype(int)
    return df

In [94]:
df_21 = get_data('contract')
df_21.loc[df_21.REGION == '0','REGION2'] = '태안'
df_21.loc[df_21.REGION == '0','REGION'] = '충청남도'
df_21

Unnamed: 0,SOLDDATE,CUSTID,PRODNAME,REGION,REGION2,QUANT
0,2021-02-22,2001102,PEMA-580FX,충청남도,금산,15000
1,2021-02-22,2001200,PEMA-580FX,충청남도,태안,20000
2,2021-02-22,2001300,AE,서울,송파,2000
3,2021-02-22,2001400,CSA5000,경상북도,경주,10000
4,2021-02-22,2001500,CSA5000,경상북도,성주,10000
...,...,...,...,...,...,...
1714,2021-04-21,2007400,PEMA-SR2000,인천,인천,10000
1715,2021-04-21,2007500,PEMA-SR2000,부산광역시,부산,8000
1716,2021-04-21,2007600,PEMA-HR1000,충청북도,청양,7000
1717,2021-04-21,2007700,PEMA-SN400,경기도남부,안성,10000


빈 날짜들 만들기

In [95]:
# 연도를 입력하면 전체 날짜가 포함된 날짜가 있는 데이터프레임 생성 (데이터 타입은 datetime)
# end date를 주면 거기까지만 생성하도록 한다.

def get_date(year=None,startdate = None, enddate=None):
    import datetime
    import numpy as np
    import pandas as pd 
    if startdate == None:
        date = str(year) + '0101'
        date = datetime.datetime.strptime(date,'%Y%m%d')
    else:
        date = startdate
        date = datetime.datetime.strptime(date,'%Y%m%d')
    if enddate != None:
        enddate = datetime.datetime.strptime(enddate,'%Y%m%d')

    li = []
    stop = 0
    stop_year = date.year + 1
    while stop == 0:
        li.append(date)
        date = date + datetime.timedelta(days=1)
        if date.year == stop_year:
            stop = 1
        elif date - datetime.timedelta(days=1) == enddate:
            stop = 1
    df = pd.DataFrame(np.array(li),columns=['SOLDDATE'])
    df.SOLDDATE = df.SOLDDATE.apply(lambda x: str(x.date()))
    return df

In [96]:
df_date_21 = get_date(2021,startdate='20210101',enddate='20211231')
df_date_21

Unnamed: 0,SOLDDATE
0,2021-01-01
1,2021-01-02
2,2021-01-03
3,2021-01-04
4,2021-01-05
...,...
360,2021-12-27
361,2021-12-28
362,2021-12-29
363,2021-12-30


In [97]:
df_21.CUSTID.unique()

array(['2001102', '2001200', '2001300', '2001400', '2001500', '2001600',
       '2001700', '2001800', '2002000', '2002100', '2002200', '2002300',
       '2002400', '2002500', '2002600', '2002700', '2002800', '2002900',
       '2003000', '2003100', '2003200', '2003300', '2003400', '2003500',
       '2003600', '2003700', '2003800', '2003900', '2004000', '2004100',
       '2004104', '2004200', '2004300', '2004400', '2004500', '2004600',
       '2004700', '2004800', '2004901', '2005000', '2005100', '2005200',
       '2005300', '2005400', '2005500', '2005600', '2005700', '2005800',
       '2005900', '2006000', '2006100', '2006200', '2006300', '2006400',
       '2006500', '2006600', '2006606', '2006609', '2006612', '2006700',
       '2006800', '2006900', '2007000', '2007100', '2007200', '2007302',
       '2007400', '2007500', '2007600', '2007700', '2007800'],
      dtype=object)

In [98]:
df_21.REGION.unique()

array(['충청남도', '서울', '경상북도', '경기도남부', '강원도', '인천', '대구광역시', '경상남도',
       '부산광역시', '충청북도', '울산광역시', '전라남도', '대전광역시', '경기도북부', '전라북도', '세종시'],
      dtype=object)

기상청 데이터 가져오기


In [99]:
# ## 수도권
# 서울 = ['서울','강서','송파','남서울','동서울','신림','풍납','성수']  ### 기상CODE 108 / (서울) 수도권기상청
# 경기도북부 = ['동두천','남양주','파주','덕소','양주','일산','의정부','김포','진접','포천','구리','고양','화도']  ### 기상CODE 98 / (동두천) 수도권기상청
# 경기도남부 = ['하남','용인','광주','오산','수원','평택','이천','동탄','안성','양평','화성','여주','안산','광명','성남','비봉','의왕','미사리']   ### 기상CODE 119 / (수원) 수도권기상청
# 인천 = ['인천','가좌','부천','송도','서인천','대산']  ### 기상CODE 112 / (인천) 수도권기상청

# ## 경상도 
# 부산광역시 = ['부산','정관','회동동','서부산']       ### 기상CODE / 159 (부산) 부산지방기상청
# 울산광역시 = ['울산','언양','온산','남부']  ### 기상CODE / 152 (울산) 울산기상대	
# 대구광역시 = ['대구','하빈']                ### 기상CODE / 143 (대구) 대구지방기상청
# 경상북도 = ['포항','경주','칠곡','울진','문경','안동','약목','경산','단촌','고령','영덕','예천','구미','영양','성주','남포항' ]  ### 기상CODE 278 / (의성) 대구지방기상청
# 경상남도 =['함안','진해','김해','의령','양산','합천','칠서','진주','산청','함양','창녕','창원','밀양','하동','진영','사천','마산']  ### 기상CODE 263 / (의령군) 창원기상대


# ## 전라도
# 전라북도 =['정읍','이서','익산','완주','군산','남원','부안','장수','임실','전주','오수','군위','순창' ]  ### 기상CODE 146 / (전주) 전주기상지청
# 전라남도 =['순천','담양','여수','나주','영광','화순','장흥','해남','광양','노화도','목포','장성','무안','구례','군산한전주']   ### 기상CODE 156 / (광주) 광주지방기상청


# ## 충청도
# 대전광역시 = ['대전','장동']  ### 기상CODE 133 / (대전) 대전지방기상청
# 세종시 = ['세종','연기']  ### 기상CODE 239 / (세종) 대전지방기상청
# 충청북도 = [ '청주','음성','충주','옥천','청원','진천','보은','단양','옥산' ]  ### 기상CODE 131 / (청주) 청주기상지청	
# 충청남도 = [ '서산','당진','계룡','천안','아산','영동','청양','홍성','금산','태안','공주','예산','천북','부여','논산','보령' ]  ### 기상CODE 129 / (서산) 홍성기상대


# ## 강원도
# 강원도 = ['강릉','동해','원주','삼척','철원','횡성','옥계','고성','양양','평창','춘천','영월']  ### 기상CODE 114 / (원주) 강원지방기상청	

def cat_location(x):
        if x == '서울': return 108
        elif x == '경기도북부': return 98
        elif x == '경기도남부': return 119
        elif x == '인천': return 112
        elif x == '부산광역시': return 159
        elif x == '대구광역시': return 143
        elif x == '울산광역시': return 152
        elif x == '경상북도': return 278
        elif x == '경상남도': return 263
        elif x == '전라북도': return 146
        elif x == '전라남도': return 156
        elif x == '대전광역시': return 133
        elif x == '세종시': return 239
        elif x == '충청북도': return 131
        elif x == '충청남도': return 129
        elif x == '강원도': return 114
        else: return 119 ## 해당 안될시 생판지역인 평택 기준 날씨로 변환

def loc_weather(start_date, end_date, location):
    import pandas as pd
    import requests
    import numpy as np

    start_date = start_date.replace('-','')
    end_date = end_date.replace('-','')

    location_code = cat_location(location)

    url = 'http://apis.data.go.kr/1360000/AsosDalyInfoService/getWthrDataList'

    params ={'serviceKey' : 'ZKOx0KH7l+PcSZZNRvuI54pjFf5gbYeIa1ccvoUcbzlwPA7ZRd9AqYB+V6++N/urN+9OncLmDH9MvqvMu5SKbg==', 
            'pageNo' : '1', 
            'numOfRows' : '999', 
            'dataType' : 'JSON', 
            'dataCd' : 'ASOS', 
            'dateCd' : 'DAY', 
            'startDt' : start_date,
            'endDt' : end_date, 
            'stnIds' : str(location_code) }


    response = requests.get(url, params=params).json()
    r_response = response.get("response")
    r_body = r_response.get("body")
    r_items = r_body.get("items")
    r_item = r_items.get("item")

    time = [] #일자
    tem = [] #온도
    hum = [] #습도

    for i in range(len(r_item)):
        time.append(r_item[i]['tm'])
        tem.append(r_item[i]['avgTa'])
        hum.append(r_item[i]['avgRhm'])

    time = pd.Series(time)
    tem = pd.Series(tem)
    hum = pd.Series(hum)

    data = [time, tem, hum]

    df = pd.concat(data, axis=1)

    df.columns = ['SOLDDATE','TEMP','HUM']
    df.replace('', 0,inplace=True)

    df['REGION'] = location
    df['SOLDDATE'] = pd.to_datetime(df['SOLDDATE'])
    df['TEMP'] = round(df['TEMP'].astype('float32'),1)
    df['HUM'] = round(df['HUM'].astype('float32'),1)

    return df

def make_weather_data(year):
    start_date = str(year) + '-01-01'
    end_date = str(year) + '-12-31'
    loc_li = ['서울','경기도북부','경기도남부','인천','부산광역시','대구광역시','울산광역시','경상북도','경상남도','전라북도','전라남도','대전광역시','세종시','충청북도','충청남도','강원도']
    globals()[f'weather_data_{year}'] = pd.DataFrame()
    
    for i in loc_li:
        globals()[f'weather_{i}'] = loc_weather(start_date,end_date,i)
        globals()[f'weather_data_{year}'] = pd.concat([globals()[f'weather_data_{year}'], globals()[f'weather_{i}']], axis=0)
    
    globals()[f'weather_data_{year}'].reset_index(drop=True,inplace=True)
    globals()[f'weather_data_{year}'].to_csv(f'./dataset/weather_data_{year}.csv', index=False)
    return globals()[f'weather_data_{year}']

In [100]:
# weather_data = make_weather_data(2021)
weather_data = pd.read_csv('./dataset/weather_data_2021.csv')
weather_data.SOLDDATE = weather_data.SOLDDATE.astype('str')
weather_data.head()

Unnamed: 0,SOLDDATE,TEMP,HUM,REGION
0,2021-01-01,-4.2,64.0,서울
1,2021-01-02,-5.0,38.5,서울
2,2021-01-03,-5.6,45.0,서울
3,2021-01-04,-3.5,51.400002,서울
4,2021-01-05,-5.5,52.799999,서울


In [101]:
weather_data

Unnamed: 0,SOLDDATE,TEMP,HUM,REGION
0,2021-01-01,-4.2,64.000000,서울
1,2021-01-02,-5.0,38.500000,서울
2,2021-01-03,-5.6,45.000000,서울
3,2021-01-04,-3.5,51.400002,서울
4,2021-01-05,-5.5,52.799999,서울
...,...,...,...,...
5835,2021-12-27,-7.8,59.799999,강원도
5836,2021-12-28,-3.8,72.800003,강원도
5837,2021-12-29,-0.2,72.900002,강원도
5838,2021-12-30,-2.7,45.400002,강원도


병합 하여 데이터프레임 구성


In [102]:
for region in weather_data.REGION.unique():
    for date in weather_data.SOLDDATE.unique():
        temp = weather_data.loc[(weather_data.REGION==region)&(weather_data.SOLDDATE==date),'TEMP'].values[0]
        hum = weather_data.loc[(weather_data.REGION==region)&(weather_data.SOLDDATE==date),'HUM'].values[0]
        df_21.loc[(df_21.REGION==region)&(df_21.SOLDDATE==date),'TEMP'] = round(temp,1)
        df_21.loc[(df_21.REGION==region)&(df_21.SOLDDATE==date),'HUM'] = round(hum,1)
df_21 = df_21[['SOLDDATE', 'CUSTID', 'PRODNAME', 'REGION', 'REGION2', 'TEMP', 'HUM', 'QUANT']]
df_21

Unnamed: 0,SOLDDATE,CUSTID,PRODNAME,REGION,REGION2,TEMP,HUM,QUANT
0,2021-02-22,2001102,PEMA-580FX,충청남도,금산,6.500000,67.599998,15000
1,2021-02-22,2001200,PEMA-580FX,충청남도,태안,6.500000,67.599998,20000
2,2021-02-22,2001300,AE,서울,송파,7.800000,58.299999,2000
3,2021-02-22,2001400,CSA5000,경상북도,경주,9.600000,48.599998,10000
4,2021-02-22,2001500,CSA5000,경상북도,성주,9.600000,48.599998,10000
...,...,...,...,...,...,...,...,...
1714,2021-04-21,2007400,PEMA-SR2000,인천,인천,16.900000,52.900002,10000
1715,2021-04-21,2007500,PEMA-SR2000,부산광역시,부산,18.600000,44.299999,8000
1716,2021-04-21,2007600,PEMA-HR1000,충청북도,청양,20.299999,34.900002,7000
1717,2021-04-21,2007700,PEMA-SN400,경기도남부,안성,17.799999,50.900002,10000


착공면적을 가져와 병합해주는 사용자함수 (최종적으로 사용할 예정)

In [103]:
# 실제 데이터를 가져오는 사용자함수
def get_construction(table_name):
    import pandas as pd
    import mariadb
    import sys

    # Connect to MariaDB Platform
    try:
        conn = mariadb.connect(
            user="root",
            password="root",
            host="localhost",
            port=3306,
            database="smart_factory"
        )
    except mariadb.Error as e:
        print(f"Error connecting to MariaDB Platform: {e}")
        sys.exit(1)
    
    # Get Cursor
    cur = conn.cursor()
    cur.execute(f"select 연도, 월, 총계, 전월대비증감율 from `{table_name}`")
    x = cur.fetchall()
    df = pd.DataFrame(x,columns=['YEAR','MONTH','CONSTRUCTION','CONSTRUCTION_RATE'])
    return df

In [104]:
df_construction = get_construction('building_construction')
df_construction.head()

Unnamed: 0,YEAR,MONTH,CONSTRUCTION,CONSTRUCTION_RATE
0,2019,3,10659525.0,42.7
1,2019,4,11014065.0,3.3
2,2019,5,11100662.0,0.8
3,2019,6,9165649.0,-17.4
4,2019,7,8936557.0,-2.5


In [105]:
df_21['CONSTRUCTION'] = 0
df_21['CONSTRUCTION_RATE'] = 0
df_21.SOLDDATE = df_21.SOLDDATE.astype('datetime64')
for i in df_21.index:
    year = df_21.loc[i,'SOLDDATE'].year
    month = df_21.loc[i,'SOLDDATE'].month
    df_21.loc[i,'CONSTRUCTION'] = df_construction[(df_construction['YEAR']==year) & (df_construction['MONTH']==month)].CONSTRUCTION.values[0]
    df_21.loc[i,'CONSTRUCTION_RATE'] = df_construction[(df_construction['YEAR']==year) & (df_construction['MONTH']==month)].CONSTRUCTION_RATE.values[0]
df_21 = df_21[['SOLDDATE', 'CUSTID', 'PRODNAME', 'REGION', 'REGION2', 'TEMP', 'HUM', 'CONSTRUCTION', 'CONSTRUCTION_RATE', 'QUANT']]
df_21

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_21['CONSTRUCTION'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_21['CONSTRUCTION_RATE'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See t

Unnamed: 0,SOLDDATE,CUSTID,PRODNAME,REGION,REGION2,TEMP,HUM,CONSTRUCTION,CONSTRUCTION_RATE,QUANT
0,2021-02-22,2001102,PEMA-580FX,충청남도,금산,6.500000,67.599998,10281640,34.6,15000
1,2021-02-22,2001200,PEMA-580FX,충청남도,태안,6.500000,67.599998,10281640,34.6,20000
2,2021-02-22,2001300,AE,서울,송파,7.800000,58.299999,10281640,34.6,2000
3,2021-02-22,2001400,CSA5000,경상북도,경주,9.600000,48.599998,10281640,34.6,10000
4,2021-02-22,2001500,CSA5000,경상북도,성주,9.600000,48.599998,10281640,34.6,10000
...,...,...,...,...,...,...,...,...,...,...
1714,2021-04-21,2007400,PEMA-SR2000,인천,인천,16.900000,52.900002,12006327,-8.7,10000
1715,2021-04-21,2007500,PEMA-SR2000,부산광역시,부산,18.600000,44.299999,12006327,-8.7,8000
1716,2021-04-21,2007600,PEMA-HR1000,충청북도,청양,20.299999,34.900002,12006327,-8.7,7000
1717,2021-04-21,2007700,PEMA-SN400,경기도남부,안성,17.799999,50.900002,12006327,-8.7,10000


In [106]:
# df_date_21 = get_date(2021,startdate='20210101',enddate='20211231')