In [4]:
import pandas as pd

df = pd.read_csv('/content/201902-fordgobike-tripdata.csv')

In [5]:
import pandas as pd

# Display basic information about the dataset
print(df.info())

# Display the first few rows
print(df.head())

# Display summary statistics
print(df.describe())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183412 entries, 0 to 183411
Data columns (total 16 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   duration_sec             183412 non-null  int64  
 1   start_time               183412 non-null  object 
 2   end_time                 183412 non-null  object 
 3   start_station_id         183215 non-null  float64
 4   start_station_name       183215 non-null  object 
 5   start_station_latitude   183412 non-null  float64
 6   start_station_longitude  183412 non-null  float64
 7   end_station_id           183215 non-null  float64
 8   end_station_name         183215 non-null  object 
 9   end_station_latitude     183412 non-null  float64
 10  end_station_longitude    183412 non-null  float64
 11  bike_id                  183412 non-null  int64  
 12  user_type                183412 non-null  object 
 13  member_birth_year        175147 non-null  float64
 14  memb

In [7]:
# Check for missing values
print(df.isnull().sum())

# Drop rows with missing start or end station data
df_cleaned = df.dropna(subset=['start_station_name', 'end_station_name', 'start_station_latitude', 'start_station_longitude'])

# Convert 'start_time' and 'end_time' to datetime format
df_cleaned['start_time'] = pd.to_datetime(df_cleaned['start_time'])
df_cleaned['end_time'] = pd.to_datetime(df_cleaned['end_time'])

# Check for duplicates and remove them
df_cleaned = df_cleaned.drop_duplicates()

# Display the cleaned DataFrame information
print(df_cleaned.info())
print(df_cleaned.isnull().sum())

duration_sec                  0
start_time                    0
end_time                      0
start_station_id            197
start_station_name          197
start_station_latitude        0
start_station_longitude       0
end_station_id              197
end_station_name            197
end_station_latitude          0
end_station_longitude         0
bike_id                       0
user_type                     0
member_birth_year          8265
member_gender              8265
bike_share_for_all_trip       0
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['start_time'] = pd.to_datetime(df_cleaned['start_time'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['end_time'] = pd.to_datetime(df_cleaned['end_time'])


<class 'pandas.core.frame.DataFrame'>
Index: 183215 entries, 0 to 183411
Data columns (total 16 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   duration_sec             183215 non-null  int64         
 1   start_time               183215 non-null  datetime64[ns]
 2   end_time                 183215 non-null  datetime64[ns]
 3   start_station_id         183215 non-null  float64       
 4   start_station_name       183215 non-null  object        
 5   start_station_latitude   183215 non-null  float64       
 6   start_station_longitude  183215 non-null  float64       
 7   end_station_id           183215 non-null  float64       
 8   end_station_name         183215 non-null  object        
 9   end_station_latitude     183215 non-null  float64       
 10  end_station_longitude    183215 non-null  float64       
 11  bike_id                  183215 non-null  int64         
 12  user_type            

In [9]:
# Extract the start station details and count the number of rides
start_stations = df_cleaned.groupby(['start_station_id', 'start_station_name', 'start_station_latitude', 'start_station_longitude']).size().reset_index(name='num_rides')

# Rename columns for consistency
start_stations.columns = ['station_id', 'station_name', 'latitude', 'longitude', 'num_rides']

start_stations.head()

Unnamed: 0,station_id,station_name,latitude,longitude,num_rides
0,3.0,Powell St BART Station (Market St at 4th St),37.786375,-122.404904,2760
1,4.0,Cyril Magnin St at Ellis St,37.785881,-122.408915,610
2,5.0,Powell St BART Station (Market St at 5th St),37.783899,-122.408445,2327
3,6.0,The Embarcadero at Sansome St,37.80477,-122.403234,2082
4,7.0,Frank H Ogawa Plaza,37.804562,-122.271738,827


In [11]:
# Analyze the bike share data:
most_popular_station = start_stations.loc[start_stations['num_rides'].idxmax()]
least_popular_station = start_stations.loc[start_stations['num_rides'].idxmin()]
average_rides_per_station = start_stations['num_rides'].mean()

print("Most popular station:")
print(most_popular_station)
print("\nLeast popular station:")
print(least_popular_station)
print("\nAverage rides per station:", average_rides_per_station)

Most popular station:
station_id                      58.0
station_name    Market St at 10th St
latitude                   37.776619
longitude                -122.417385
num_rides                       3904
Name: 48, dtype: object

Least popular station:
station_id              344.0
station_name    16th St Depot
latitude            37.766349
longitude         -122.396292
num_rides                   2
Name: 296, dtype: object

Average rides per station: 556.8844984802431


In [23]:
import folium

# Create a Folium map centered around San Francisco
map_center = [37.77, -122.42]
bike_map = folium.Map(location=map_center, zoom_start=12)

In [26]:
def get_marker_color(num_rides):
    if num_rides < 1000:
        return 'green'
    elif num_rides < 5000:
        return 'orange'
    else:
        return 'red'

for _, row in start_stations.iterrows():
    # Get the appropriate marker color based on the number of rides for the current station
    marker_color = get_marker_color(row['num_rides'])

    # Create a folium Marker object with the station's latitude and longitude coordinates
    marker = folium.Marker(
        location=[row['latitude'], row['longitude']],  # Marker location
        icon=folium.Icon(color=marker_color),  # Custom marker color based on the number of rides
        # Set the popup content to display the station name and number of rides
        popup=f"Station: {row['station_name']}<br>Rides: {row['num_rides']}"
    )

    # Add the marker to the bike_map
    marker.add_to(bike_map)

# Save the map to an HTML file
bike_map.save('bike_share_map_with_markers.html')

bike_map