In [None]:
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import pandas as pd
import glob

csv_list = []
for csv in glob.glob("C:/Users/kimba/OneDrive/Desktop/Cyclistic Analysis/CSVs/*"):
    csv_list.append(pd.read_csv(csv, index_col=None, header=0))

df = pd.concat(csv_list, axis=0, ignore_index=True)

In [None]:
df.head()

In [None]:
df.dtypes, df.shape

In [None]:
f = lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
df['started_at'] = df['started_at'].apply(f)
df['ended_at'] = df['ended_at'].apply(f)


In [None]:
df['ride_length'] = df['ended_at'] - df['started_at']
df['ride_length'] = df['ride_length'].apply(lambda x: x.total_seconds() / 60)

In [None]:
df['ride_length'].groupby(df['member_casual']).mean()

In [None]:
ride_length_by_member = df.loc[:, ['ride_length', 'member_casual']]
ride_length_by_member

In [None]:
sns.boxplot(x='member_casual', y='ride_length', data=ride_length_by_member)

In [None]:
ride_length_by_member.groupby('member_casual').describe()

In [None]:
rl_by_mem_clean = ride_length_by_member[ride_length_by_member['ride_length'] >= 0]

sns.boxplot(x='member_casual', y='ride_length', data=rl_by_mem_clean)

In [None]:
rl_by_mem_clean.groupby('member_casual').describe()

In [None]:
df['day_of_week'] = df['started_at'].apply(lambda x: x.weekday())

In [None]:
day_of_week_by_member = pd.crosstab(df['day_of_week'], df['member_casual'])

day_of_week_by_member_norm = pd.crosstab(df['day_of_week'], df['member_casual'], normalize='columns')
day_of_week_by_member, day_of_week_by_member_norm

In [None]:
plt.plot(data=day_of_week_by_member_norm, )
plt.xlabel("Day of Week")
plt.ylabel("Proportion of Total Riders")

In [None]:
a, b = day_of_week_by_member.plot(legend=True), day_of_week_by_member_norm.plot(legend=True)
b.set_xlabel("Day of Week")
b.set_ylabel("Proportion of Riders")
b.legend(title="Membership Status")
b.set_xticklabels(["", "Mon", "Tues", "Wed", "Thur", "Fri", "Sat", "Sun"])
b

In [None]:
count = {}
for day in range(7):
    count[day] = len(df[df['day_of_week'] == day])

count

In [None]:
member_casual_dbl_count, stations, lat_coords, lng_coords = [None]*(len(df)*2), [None]*(len(df)*2), [None]*(len(df)*2), [None]*(len(df)*2)
member_casual_dbl_count[::2], stations[::2], lat_coords[::2], lng_coords[::2] = df['member_casual'], df['start_station_name'], df['start_lat'], df['start_lng']
member_casual_dbl_count[1::2], stations[1::2], lat_coords[1::2], lng_coords[1::2] = df['member_casual'], df['end_station_name'], df['end_lat'], df['end_lng']
station_interactions = pd.DataFrame(data={'member_casual': member_casual_dbl_count, 'station': stations, 'latitude': lat_coords, 'longitude': lng_coords})
station_interactions.head()

In [None]:
unique_stations = df['start_station_name'].drop_duplicates(inplace=False)
unique_stations = unique_stations.dropna()

station_coords = {}
for station in unique_stations:
    station_coords[station] = [station_interactions[station_interactions['station'] == station].iloc[0, 2], station_interactions[station_interactions['station'] == station].iloc[0, 3]]

len(station_coords)

In [None]:
station_coords

In [None]:
for station, coords in station_coords.items():
    station_interactions.loc[station_interactions['station'] == station, 'latitude'] = coords[0]
    station_interactions.loc[station_interactions['station'] == station, 'longitude'] = coords[1]

In [None]:
station_interactions[station_interactions['station'] == 'Franklin St & Illinois St']

In [None]:
# station_interactions.to_csv("cyclistic_station_interactions.csv")