In [4]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\neteera\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [5]:
import sklearn

In [6]:
import math
from datetime import datetime

# Haversine function to calculate the distance between two points on the Earth's surface
def haversine(lon1, lat1, lon2, lat2):
    R = 6371  # Earth radius in kilometers
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)
    
    a = math.sin(delta_phi/2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    return R * c  # Output distance in kilometers

# User class to store carpooling participant information
class User:
    def __init__(self, name, start_lon, start_lat, end_lon, end_lat, distance, travel_time, start_time, end_time):
        self.name = name
        self.start_lon = start_lon
        self.start_lat = start_lat
        self.end_lon = end_lon
        self.end_lat = end_lat
        self.distance = distance  # distance willing to travel from start to end
        self.travel_time = travel_time
        self.start_time = datetime.strptime(start_time, '%H:%M')  # Start time as HH:MM
        self.end_time = datetime.strptime(end_time, '%H:%M')  # End time as HH:MM

    def __repr__(self):
        return f"User({self.name})"

# Matching function to find potential carpool matches
def find_matches(users, distance_threshold=1.0, time_threshold=30):
    matches = []
    
    for i, user1 in enumerate(users):
        for user2 in users[i+1:]:
            start_distance = haversine(user1.start_lon, user1.start_lat, user2.start_lon, user2.start_lat)
            end_distance = haversine(user1.end_lon, user1.end_lat, user2.end_lon, user2.end_lat)
            
            if start_distance <= distance_threshold and end_distance <= distance_threshold:
                # Check if travel time overlap is within threshold
                start_time_diff = abs((user1.start_time - user2.start_time).total_seconds() / 60)
                end_time_diff = abs((user1.end_time - user2.end_time).total_seconds() / 60)
                
                if start_time_diff <= time_threshold and end_time_diff <= time_threshold:
                    matches.append((user1, user2))
    
    return matches

# Example usage
users = [
    User("Alice", 100.0, 13.7, 100.2, 13.8, 5, 30, "08:00", "09:00"),
    User("Bob", 100.1, 13.75, 100.25, 13.85, 6, 35, "08:15", "09:15"),
    User("Charlie", 101.0, 14.0, 101.1, 14.1, 8, 45, "07:45", "08:45"),
]

matches = find_matches(users, distance_threshold=13.0, time_threshold=60)

for match in matches:
    print(f"Match found: {match[0].name} and {match[1].name}")


Match found: Alice and Bob


In [7]:

from sklearn.neighbors import NearestNeighbors
import numpy as np

# Create the feature vectors for each user
# Format: [start_lon, start_lat, end_lon, end_lat, start_time_minutes, end_time_minutes]
users_features = np.array([
    [100.0, 13.7, 100.2, 13.8, 480, 540],   # Alice
    [100.05, 13.75, 100.25, 13.85, 495, 555], # Bob
    [100.08, 13.72, 100.28, 13.82, 510, 570], # Charlie
])

# Define the number of neighbors to consider
k = 2

# Initialize the KNN model
knn = NearestNeighbors(n_neighbors=k, metric='euclidean')

# Fit the model on the data
knn.fit(users_features)

# Find the nearest neighbors for each user
distances, indices = knn.kneighbors(users_features)

# Print the matches
for i, (distance, neighbor_index) in enumerate(zip(distances, indices)):
    user = users[i]
    for j, index in enumerate(neighbor_index):
        if i != index:  # Skip self-match
            neighbor = users[index]
            print(f"Match found: {user.name} and {neighbor.name} with distance {distance[j]:.2f}")


Match found: Alice and Bob with distance 21.21
Match found: Bob and Charlie with distance 21.21
Match found: Charlie and Bob with distance 21.21


In [8]:
from sklearn.cluster import KMeans

# Number of clusters (can be adjusted)
n_clusters = 2

# Initialize the K-Means model
kmeans = KMeans(n_clusters=n_clusters)

# Fit the model and predict clusters
clusters = kmeans.fit_predict(users_features)

# Print out users in each cluster
for cluster in range(n_clusters):
    print(f"\nCluster {cluster+1}:")
    cluster_members = np.where(clusters == cluster)[0]
    for member_index in cluster_members:
        print(f"User: {users[member_index].name}")

##rttttt


Cluster 1:
User: Bob
User: Charlie

Cluster 2:
User: Alice
