#### Answer 5

In [4]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import folium

# Load the dataset
data = pd.read_csv('rideshare_kaggle.csv')

# Check if the dataset is empty or missing
if data.empty:
    print("Error: Dataset is empty or missing.")
    exit()

# Preprocess the dataset
data = data[['price', 'latitude', 'longitude']]  # Select relevant columns

# Drop rows with missing values
data = data.dropna()

# Perform unsupervised learning for high booking area prediction
kmeans = KMeans(n_clusters=5, random_state=42)
data['booking_area_cluster'] = kmeans.fit_predict(data[['latitude', 'longitude']])

# Perform supervised learning for price prediction
X = data.drop(['price', 'booking_area_cluster'], axis=1)
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
linear_regression = LinearRegression()
linear_regression.fit(X_train, y_train)
y_pred = linear_regression.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Create a map
m = folium.Map(location=[data['latitude'].mean(), data['longitude'].mean()], zoom_start=10)

# Add markers for high booking areas
for index, row in data.iterrows():
    if row['booking_area_cluster'] == 0:
        color = 'blue'
    elif row['booking_area_cluster'] == 1:
        color = 'green'
    elif row['booking_area_cluster'] == 2:
        color = 'red'
    elif row['booking_area_cluster'] == 3:
        color = 'orange'
    else:
        color = 'purple'
    folium.Marker(location=[row['latitude'], row['longitude']], icon=folium.Icon(color=color)).add_to(m)

# Display the map
m.save('booking_areas_map.html')

Mean Squared Error: 87.18083714909837


#### Answer 7

In [7]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from flask import Flask, render_template

# Step 1: Load the dataset
data = pd.read_csv('data.csv')

# Step 2: Preprocess the data (if necessary)

# Remove the 'filename' column
data = data.drop('filename', axis=1)

# Step 3: Apply unsupervised algorithm
X = data.drop('label', axis=1)  # Features
y = data['label']  # Labels (if available)

# Example: Using K-means clustering
k = 5  # Number of clusters
kmeans = KMeans(n_clusters=k)
kmeans.fit(X)

# Step 4: Fit the model

# Step 5: Obtain cluster assignments or reduced-dimensional representations
cluster_labels = kmeans.labels_

# Step 6: Evaluate accuracy or quality of clusters
silhouette_avg = silhouette_score(X, cluster_labels)

print("Silhouette Score:", silhouette_avg)

# Step 7: Display results in the frontend
app = Flask(__name__)

@app.route('/')
def home():
    return render_template('index.html', labels=cluster_labels)

if __name__ == '__main__':
    app.run(debug=True)

Silhouette Score: 0.4589073526363415
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


 * Restarting with windowsapi reloader


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
