## 판다스_서울 자전거 데이터 분석

In [35]:
import pandas as pd
df = pd.read_csv('../0_data/SeoulBikeData.csv')
# print(df.head())
df.columns

Index(['Date', 'Rented Bike Count', 'Hour', 'Temperature', 'Humidity(%)',
       'Wind speed (m/s)', 'Visibility (10m)', 'Dew point temperature',
       'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)', 'Seasons',
       'Holiday', 'Functioning Day'],
      dtype='object')

In [None]:
# df['Date'] # 특정 컬럼 1개 추출
df[['Date', 'Rented Bike Count']].head() # 특정 컬럼 2개 이상 추출

- 월 추출 (from Date 컬럼)

In [26]:
df['Date'].head()

0    01/12/2017
1    01/12/2017
2    01/12/2017
3    01/12/2017
4    01/12/2017
Name: Date, dtype: object

In [27]:
# 첫 번째 방법
month_list = []

for date in df['Date']:
    month = date[3:5]
    month_list.append(month)

len(month_list)

8760

In [28]:
df['Month'] = month_list
df.head()

Unnamed: 0,Date,Rented Bike Count,Hour,Temperature,Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature,Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day,Month
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes,12
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12


In [32]:
# 두 번째 방법: 더 간단함
def date_to_month(date):
    month = date[3:5]
    return month

In [33]:
assert date_to_month('01/12/2017') == '12'

In [36]:
df['Month'] = df['Date'].apply(lambda date: date_to_month(date))
df['Month'].head()

0    12
1    12
2    12
3    12
4    12
Name: Month, dtype: object

In [40]:
df['Year'] = df['Date'].apply(lambda date: date[6:])
df['Year'].head()

0    2017
1    2017
2    2017
3    2017
4    2017
Name: Year, dtype: object

- 요일 추출 ex.월

In [69]:
import datetime
date = df['Date'][0]
yyyy = int(date[6:])
mm = int(date[3:5])
dd = int(date[:2])

datetime.date(yyyy, mm, dd).weekday()

4

In [58]:
date

'01/12/2017'

In [43]:
index_to_weekday = {i: day for i, day in enumerate(['월', '화', '수', '목', '금', '토', '일'])}
index_to_weekday

{0: '월', 1: '화', 2: '수', 3: '목', 4: '금', 5: '토', 6: '일'}

In [44]:
index_to_weekday[datetime.date(yyyy, mm, dd).weekday()]

'금'

In [65]:
def date_to_weekday(date):
    index_to_weekday = {i: day for i, day in enumerate(['월', '화', '수', '목', '금', '토', '일'])}
    yyyy = int(date[6:])
    mm = int(date[3:5])
    dd = int(date[:2])
    weekday = index_to_weekday[datetime.date(yyyy, mm, dd).weekday()]
    return weekday

In [82]:
def date_to_weekday(date):
    yyyy = int(date[6:])
    mm = int(date[3:5])
    dd = int(date[:2])
    dateime_obj = datetime.date(yyyy, mm, dd)
    day_int = dateime_obj.weekday()
    return ['월', '화', '수', '목', '금', '토', '일'][day_int]

In [83]:
assert date_to_weekday('02/12/2017') == '토'

In [84]:
df['Weekday'] = df['Date'].apply(date_to_weekday)

In [85]:
df['Weekday'][72]

'월'

In [86]:
df.columns

Index(['Date', 'Rented Bike Count', 'Hour', 'Temperature', 'Humidity(%)',
       'Wind speed (m/s)', 'Visibility (10m)', 'Dew point temperature',
       'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)', 'Seasons',
       'Holiday', 'Functioning Day', 'Month', 'Year', 'weekday', 'Weekday'],
      dtype='object')

In [87]:
df[['Year', 'Month', 'Weekday']].head()

Unnamed: 0,Year,Month,Weekday
0,2017,12,금
1,2017,12,금
2,2017,12,금
3,2017,12,금
4,2017,12,금


In [90]:
# groupby: 데이터프레임.groupby(컬럼명1).통계()[컬럼명2]
# 통계함수: min, median(중앙값), sum, max, mean
# df.groupby('Month').mean()
df.groupby('Month').mean()['Rented Bike Count']

  df.groupby('Month').mean()['Rented Bike Count']


Month
01     201.620968
02     225.941964
03     511.551075
04     728.093056
05     950.387097
06    1245.676389
07     987.177419
08     876.192204
09     935.572222
10     874.563172
11     646.826389
12     249.099462
Name: Rented Bike Count, dtype: float64

- 데이터 저장

In [93]:
'Month' in df.columns

True

In [94]:
assert 'Month' in df.columns
assert 'Weekday' in df.columns