In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Create necessary directories
os.makedirs('../database', exist_ok=True)
os.makedirs('../visualizations', exist_ok=True)

# Read the original CSV file
df = pd.read_csv('../database/uber-raw-data-janjune-15.csv')

# Convert 'Pickup_date' to datetime
df['Pickup_date'] = pd.to_datetime(df['Pickup_date'])

# Extract date, time, and day of the week
df['pickup_date'] = df['Pickup_date'].dt.date
df['pickup_time'] = df['Pickup_date'].dt.time
df['pickup_day_of_week'] = df['Pickup_date'].dt.day_name()

# Drop the original 'Pickup_date' column
df = df.drop(columns=['Pickup_date'])

# Reorder the columns
df = df[['Dispatching_base_num', 'Affiliated_base_num', 'locationID', 
         'pickup_date', 'pickup_time', 'pickup_day_of_week']]

# Create visualizations
plt.figure(figsize=(12, 6))
df['pickup_day_of_week'].value_counts().plot(kind='bar', color='skyblue')
plt.title('Distribution of Uber Trips by Day of Week')
plt.xlabel('Day of Week')
plt.ylabel('Number of Trips')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../visualizations/trips_by_day_of_week.png', dpi=300, bbox_inches='tight')
plt.close()

# Top locations
plt.figure(figsize=(12, 6))
df['locationID'].value_counts().head(10).plot(kind='bar', color='coral')
plt.title('Top 10 Pickup Locations')
plt.xlabel('Location ID')
plt.ylabel('Number of Trips')
plt.tight_layout()
plt.savefig('../visualizations/top_pickup_locations.png', dpi=300, bbox_inches='tight')
plt.close()

# Save the new processed file
df.to_csv('../database/uber_trips_processed.csv', index=False)

print("✅ 'uber_trips_processed.csv' has been successfully saved to database/")
print("✅ Visualizations saved to visualizations/")


✅ 'uber_trips_processed.csv' has been successfully saved.
