Skip to content

sheygarga/Pyber

Repository files navigation

# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
#Read in data files
city_data = pd.read_csv("Pyber/raw_data/city_data.csv")
ride_data = pd.read_csv("Pyber/raw_data/ride_data.csv")
city_data.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city driver_count type
0 Kelseyland 63 Urban
1 Nguyenbury 8 Urban
2 East Douglas 12 Urban
3 West Dawnfurt 34 Urban
4 Rodriguezburgh 52 Urban
ride_data.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city date fare ride_id
0 Sarabury 2016-01-16 13:49:27 38.35 5403689035038
1 South Roy 2016-01-02 18:42:34 17.49 4036272335942
2 Wiseborough 2016-01-21 17:35:29 44.18 3645042422587
3 Spencertown 2016-07-31 14:53:22 6.87 2242596575892
4 Nguyenbury 2016-07-09 04:42:44 6.28 1543057793673
#merges imported data to have 1 final data table to work off of
final_data = pd.merge(ride_data,city_data, how = 'left', on = 'city')
final_data.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city date fare ride_id driver_count type
0 Sarabury 2016-01-16 13:49:27 38.35 5403689035038 46 Urban
1 South Roy 2016-01-02 18:42:34 17.49 4036272335942 35 Urban
2 Wiseborough 2016-01-21 17:35:29 44.18 3645042422587 55 Urban
3 Spencertown 2016-07-31 14:53:22 6.87 2242596575892 68 Urban
4 Nguyenbury 2016-07-09 04:42:44 6.28 1543057793673 8 Urban
#Creates the pie plot for the % of total Rides by City Type
#uses value_counts of "type" from the final_data dataframe


#total = final_data['type'].count()
type_plot = plt.pie(final_data['type'].value_counts(), explode = [0.10,0, 0], labels = final_data['type'].unique(),
                    autopct='%.2f',colors = ['LightCoral', 'LightSkyBlue', 'Gold'],shadow = True, startangle = 240)
plt.title('% of Total Rides by City Type')
plt.savefig('rides_city_type.png')
plt.show()
#total

png

#Creates pie plot of % of total fare by city type
#calculates sum of all the fares, lists of sums for each city type
#the for loop calculates the average for each sum and converts it to a percentage



fare_sum = final_data['fare'].sum()
sum_list = []
temp = final_data.loc[final_data['type'] == 'Urban']
urban_sum = temp['fare'].sum()
sum_list.append(urban_sum)
temp = final_data.loc[final_data['type'] == 'Suburban']
sub_sum = temp['fare'].sum()
sum_list.append(sub_sum)
temp = final_data.loc[final_data['type'] == 'Rural']
rural_sum = temp['fare'].sum()
sum_list.append(rural_sum)
i = 0
for item in sum_list:
    sum_list[i] = (sum_list[i]/fare_sum)*100
    i = i+1
fare_type_plot = plt.pie(sum_list, explode = [0.05, 0 , 0], labels = final_data['type'].unique(),autopct = '%.2f'
                        , colors = ['LightCoral', 'LightSkyBlue', 'Gold'],shadow = True, startangle = 240)
plt.title('% of Total Fares by City Type')
plt.savefig('fares_city_type.png')
plt.show()

png

#creates pie plot of % of total drivers by city type
#same process as above but uses driver_count rather than fare



driver_sum = final_data['driver_count'].sum()
sum_list = []
temp = final_data.loc[final_data['type'] == 'Urban']
urban_sum = temp['driver_count'].sum()
sum_list.append(urban_sum)
temp = final_data.loc[final_data['type'] == 'Suburban']
sub_sum = temp['driver_count'].sum()
sum_list.append(sub_sum)
temp = final_data.loc[final_data['type'] == 'Rural']
rural_sum = temp['driver_count'].sum()
sum_list.append(rural_sum)
i = 0
for item in sum_list:
    sum_list[i] = (sum_list[i]/fare_sum)*100
    i = i+1
fare_type_plot = plt.pie(sum_list, explode = [0.20, 0 , 0], labels = final_data['type'].unique(),autopct = '%.2f'
                         ,colors = ['LightCoral', 'LightSkyBlue', 'Gold'],shadow = True, startangle = 210)
plt.title('% of Total Drivers by City Type')
plt.savefig('drivers_city_type.png')
plt.show()

png

final_data.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city date fare ride_id driver_count type
0 Sarabury 2016-01-16 13:49:27 38.35 5403689035038 46 Urban
1 South Roy 2016-01-02 18:42:34 17.49 4036272335942 35 Urban
2 Wiseborough 2016-01-21 17:35:29 44.18 3645042422587 55 Urban
3 Spencertown 2016-07-31 14:53:22 6.87 2242596575892 68 Urban
4 Nguyenbury 2016-07-09 04:42:44 6.28 1543057793673 8 Urban
#creates bubble chart for the ride sharing data
#first 3 sections are for getting necessary data and separating them by type
#size is stored as _size, ride count is stored as rides_city_ using value_counts()
#for loops are mkade to get the average fare for each city




urban_data = final_data[final_data['type']=='Urban']
rural_data = final_data[final_data['type']=='Rural']
sub_data = final_data[final_data['type']=='Suburban']

urban_size = (urban_data['driver_count'])
rural_size = (rural_data['driver_count'])
sub_size = (sub_data['driver_count'])

rides_city_urban = urban_data['city'].value_counts()
rides_city_rural = rural_data['city'].value_counts()
rides_city_sub = sub_data['city'].value_counts()


urban_temp = []
urban_temp_avg = 0
for city in urban_data['city'].unique():
    temp_df = urban_data[urban_data['city']==city]
    urban_temp_avg = temp_df['fare'].mean()
    urban_temp.append(urban_temp_avg)

rural_temp = []
rural_temp_avg = 0
for city in rural_data['city'].unique():
    temp_df = rural_data[rural_data['city']==city]
    rural_temp_avg = temp_df['fare'].mean()
    rural_temp.append(rural_temp_avg) 
    
sub_temp = []
sub_temp_avg = 0
for city in sub_data['city'].unique():
    temp_df = sub_data[sub_data['city']==city]
    sub_temp_avg = temp_df['fare'].mean()
    sub_temp.append(sub_temp_avg)

#each city type plot is created separately    
#titles and legend are added after it is created    
    
    
urban_plot = plt.scatter(rides_city_urban,urban_temp, s = urban_size, c = 'lightcoral')
rural_plot = plt.scatter(rides_city_rural,rural_temp, s = rural_size, c = 'gold')
sub_plot = plt.scatter(rides_city_sub, sub_temp, s = sub_size, c = 'lightskyblue')
plt.style.use('seaborn-dark')
plt.title('Pyber Ride Sharing Data (2016)')
plt.xlabel('Total Number of Rides (Per City)')
plt.ylabel('Average Fare ($)')
plt.legend(['Urban','Rural','Suburban'],title = 'City Types')
plt.savefig('ride_sharing_data.png')
plt.show()
#print(len(rides_city_urban),len(urban_temp))
#len(urban_data['city'].unique())


#final_data['type'].value_counts()
#urban_data
#rural_data
#sub_data

png

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published