# 3차시: 대전광역시 공용자전거 (타슈) 데이터 기초 분석

## 2023. 05. 24. 14:10 ~ 16:00 (50분×2)
1. 데이터 분석 흐름
1. 원본 데이터 수집
1. 데이터 전처리
1. 데이터 분석

### 참고자료
- [파이썬 3 표준 문서](https://docs.python.org/3/index.html)
- [대전광역시 시설관리공단](https://www.djsiseol.or.kr/)
- [공공데이터포털](https://www.data.go.kr/)
- [기상청 기상자료개발포털](https://data.kma.go.kr/)

In [None]:
import datetime

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df_rent = pd.read_pickle('tashu_dataset-rental_history_2021.pkl')
df_rent['대여일시'] = pd.to_datetime(df_rent['대여일시'], format='%Y%m%d%H%M%S')
df_rent['반납일시'] = pd.to_datetime(df_rent['반납일시'], format='%Y%m%d%H%M%S')
df_rent

In [None]:
df_rent.dtypes

In [None]:
df_station = pd.read_pickle('tashu_dataset-station_information.pkl')
df_station

In [None]:
df_station.dtypes

## 대여일시 분석

In [None]:
rent_index = pd.DatetimeIndex(df_rent['대여일시'])

In [None]:
hourly_df_rent = df_rent['대여일시'].groupby(by=rent_index.hour)
hourly_df_rent.count()

In [None]:
fig = plt.figure(figsize= (8, 8))
ax = fig.add_subplot()
ax.plot(hourly_df_rent.count())

In [None]:
print((f'가장 많이 대여하는 시간: '
       f'{hourly_df_rent.count().index[hourly_df_rent.count() == max(hourly_df_rent.count())][0]}'))

In [None]:
daily_df_rent = df_rent['대여일시'].groupby(by=rent_index.strftime('%Y-%m-%d'))
daily_df_rent.count()

In [None]:
fig = plt.figure(figsize= (8, 8))
ax = fig.add_subplot()
ax.plot(daily_df_rent.count())

In [None]:
print((f'가장 많이 대여한 날: '
       f'{daily_df_rent.count().index[daily_df_rent.count() == max(daily_df_rent.count())][0]}'))

In [None]:
monthly_df_rent = df_rent['대여일시'].groupby(by=rent_index.strftime('%Y-%m'))
monthly_df_rent.count()

In [None]:
fig = plt.figure(figsize= (8, 8))
ax = fig.add_subplot()
ax.plot(monthly_df_rent.count())

In [None]:
print((f'가장 많이 대여한 날: '
       f'{monthly_df_rent.count().index[monthly_df_rent.count() == max(monthly_df_rent.count())][0]}'))

In [None]:
# https://pandas.pydata.org/docs/reference/api/pandas.Series.dt.strftime.html
# https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
weekly_df_rent = df_rent['대여일시'].groupby(by=rent_index.strftime('%w'))
weekly_df_rent.count()

In [None]:
labels = {0: 'Sunday', 1: 'Monday', 2: 'Tuesday', 3: 'Wednesday', 4: 'Thursday', 5: 'Friday', 6: 'Saturday'}
colors = ['r', 'g', 'b', 'k', 'y', 'm', 'c']

fig = plt.figure(figsize= (6, 6))
ax = fig.add_subplot()
ax.bar(labels.keys(), weekly_df_rent.count(), 
       color=colors, tick_label=list(labels.values()))

In [None]:
print((f'가장 많이 대여한 날: '
       f'{weekly_df_rent.count().index[weekly_df_rent.count() == max(weekly_df_rent.count())][0]}'))

## 정류장 분석

In [None]:
station_df_rent = df_rent['대여스테이션'].groupby(by=df_rent['대여스테이션'])
sorted_rent_station = station_df_rent.count().sort_values(ascending=False)
sorted_rent_station

In [None]:
df_station[df_station.index == sorted_rent_station.index[1]]

In [None]:
diff_rent = df_rent[df_rent['대여스테이션'] != df_rent['반납스테이션']]
same_rent = df_rent[df_rent['대여스테이션'] == df_rent['반납스테이션']]

In [None]:
t = diff_rent['대여스테이션'].groupby(by=diff_rent['대여스테이션'])
sorted_t = t.count().sort_values(ascending=False)
sorted_t

In [None]:
df_station[df_station.index == sorted_t.index[0]]

In [None]:
t = same_rent['대여스테이션'].groupby(by=same_rent['대여스테이션'])
sorted_t = t.count().sort_values(ascending=False)
sorted_t

# 대여 - 반납 분석

In [None]:
df = df_rent['대여일시'].groupby(by=[df_rent['대여스테이션'], df_rent['반납스테이션']])
df.count()

In [None]:
df.count().index[df.count() == max(df.count())]

In [None]:
df = diff_rent['대여일시'].groupby(by=[diff_rent['대여스테이션'], diff_rent['반납스테이션']])
df.count()

In [None]:
df.count().index[df.count() == max(df.count())]