# Taxi Fare Clustering Analysis

This notebook performs clustering on the taxi fare dataset to identify patterns in the data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

## 1. Load the Data

In [None]:
df = pd.read_csv('taxi_fare_data.csv')

## 2. Data Preprocessing

In [None]:
# Select features for clustering
features = df[['fare_amount', 'passenger_count']]

# Drop rows with missing values
features = features.dropna()

# Scale the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

## 3. Apply K-Means Clustering

In [None]:
# Apply K-Means with 5 clusters
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(scaled_features)
df['cluster'] = kmeans.labels_

## 4. Visualize the Clusters

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df['fare_amount'], df['passenger_count'], c=df['cluster'], cmap='viridis', alpha=0.5)
plt.title('Taxi Fare Clusters')
plt.xlabel('Fare Amount')
plt.ylabel('Passenger Count')
plt.colorbar(label='Cluster')
plt.show()