In [1]:
pip install scikit-learn




You should consider upgrading via the 'c:\Users\neteera\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [2]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\neteera\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [26]:
import math
from datetime import datetime
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Haversine function to calculate the distance between two points on the Earth's surface
def haversine(lon1, lat1, lon2, lat2):
    R = 6371  # Earth radius in kilometers
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)
    
    a = math.sin(delta_phi/2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    return R * c  # Output is the shortest distance between 2 points (in kilometers)

# User class to store carpooling participant information
class User:
    def __init__(self, user_id, name, car_model, fuel_efficiency, start_lon, start_lat, end_lon, end_lat, start_point_name, end_point_name, distance, travel_time, start_time, end_time):
        self.user_id = user_id
        self.name = name
        self.car_model = car_model  # New attribute for car model
        self.fuel_efficiency = fuel_efficiency 
        self.start_lon = start_lon
        self.start_lat = start_lat
        self.end_lon = end_lon
        self.end_lat = end_lat
        self.start_point_name = start_point_name
        self.end_point_name = end_point_name
        self.distance = distance  # distance willing to travel from start to end
        self.travel_time = travel_time
        self.start_time = datetime.strptime(start_time, '%H:%M')  # Start time as HH:MM
        self.end_time = datetime.strptime(end_time, '%H:%M')  # End time as HH:MM

    def __repr__(self):
        return f"User({self.name})"

def calculate_fuel_cost(distance, fuel_efficiency, fuel_price_per_liter=40):
    """
    Calculate the fuel cost based on distance, fuel efficiency, and fuel price per liter.
    """
    liters_needed = distance / fuel_efficiency
    return liters_needed * fuel_price_per_liter

# Convert user data to a feature matrix for KNN
def create_user_features(users):
    return np.array([
        [user.start_lon, user.start_lat, user.end_lon, user.end_lat, 
         user.start_time.hour * 60 + user.start_time.minute, 
         user.end_time.hour * 60 + user.end_time.minute]
        for user in users
    ])

# Matching function using KNN to find potential carpool matches
def find_matches(users, k=2, distance_threshold=1.0, time_threshold=30):
    # Create the feature vectors for each user
    users_features = create_user_features(users)

    # Initialize the KNN model
    knn = NearestNeighbors(n_neighbors=k, metric='euclidean')

    # Fit the model on the data
    knn.fit(users_features)

    # Find the nearest neighbors for each user
    distances, indices = knn.kneighbors(users_features)

    matches = []
    for i, (distance, neighbor_index) in enumerate(zip(distances, indices)):
        user1 = users[i]
        for j, index in enumerate(neighbor_index):
            if i != index:  # Skip self-match
                user2 = users[index]

                # Further filter the matches based on haversine distance and time overlap
                start_distance = haversine(user1.start_lon, user1.start_lat, user2.start_lon, user2.start_lat)
                end_distance = haversine(user1.end_lon, user1.end_lat, user2.end_lon, user2.end_lat)
                
                if start_distance <= distance_threshold and end_distance <= distance_threshold:
                    start_time_diff = abs((user1.start_time - user2.start_time).total_seconds() / 60)
                    end_time_diff = abs((user1.end_time - user2.end_time).total_seconds() / 60)
                    
                    if start_time_diff <= time_threshold and end_time_diff <= time_threshold:
                        match_time = (max(user1.start_time, user2.start_time), min(user1.end_time, user2.end_time))
                        matches.append({
                            'user_id_person1': user1.user_id,
                            'person1_name': user1.name,
                            'user_id_person2': user2.user_id,
                            'person2_name': user2.name,
                            'start_latitude': user1.start_lat,
                            'start_longitude': user1.start_lon,
                            'start_point_name': user1.start_point_name,
                            'end_latitude': user1.end_lat,
                            'end_longitude': user1.end_lon,
                            'end_point_name': user1.end_point_name,
                            'start_time': user1.start_time.strftime('%H:%M'),
                            'end_time': user1.end_time.strftime('%H:%M'),
                            'match_time': f"{match_time[0].strftime('%H:%M')} - {match_time[1].strftime('%H:%M')}",
                            'distance': distance[j]  # Adding the distance for reference
                        })
    
    return pd.DataFrame(matches)

# Function to create a cost table based on matches and fuel efficiency
def create_cost_table(users, matches_df, fuel_price_per_liter=40):
    cost_data = []
    
    for _, row in matches_df.iterrows():
        user1 = next(user for user in users if user.user_id == row['user_id_person1'])
        user2 = next(user for user in users if user.user_id == row['user_id_person2'])
        
        distance = user1.distance
        cost1 = calculate_fuel_cost(distance, user1.fuel_efficiency, fuel_price_per_liter)
        cost2 = calculate_fuel_cost(distance, user2.fuel_efficiency, fuel_price_per_liter)
        
        cost_data.append({
            'user_id_person1': user1.user_id,
            'person1_name': user1.name,
            'car_model_person1': user1.car_model,
            'fuel_efficiency_person1': user1.fuel_efficiency,
            'fuel_cost_person1': cost1,
            'user_id_person2': user2.user_id,
            'person2_name': user2.name,
            'car_model_person2': user2.car_model,
            'fuel_efficiency_person2': user2.fuel_efficiency,
            'fuel_cost_person2': cost2,
            'total_cost': cost1 + cost2
        })
    
    return pd.DataFrame(cost_data)

# Example usage
users = [
    User(1, "Alice", "Sedan", 15, 100.0, 13.7, 100.2, 13.8, "Start A", "End B", 5, 30, "08:00", "09:00"),
    User(2, "Bob", "SUV", 10, 100.1, 13.75, 100.25, 13.85, "Start B", "End B", 6, 35, "08:15", "09:15"),
    User(3, "Charlie", "Hatchback", 18, 103, 14.0, 101.1, 12.1, "Start C", "End C", 5, 45, "07:45", "08:45"),
    User(4, "Sali", "Toyota", 12, 104, 12.2, 103, 13.1, "Start D", "End D", 7, 37, "08:00", "08:37"),
    User(5, "Jame", "BMW", 12.1, 104, 12.0, 103, 13.1, "Start E", "End D", 7, 33, "08:30", "08:33"),
]

# Use the KNN-based matching function
# def find_matches(users, distance_threshold=1.0, time_threshold=30):
matches_df = find_matches(users, k=2, distance_threshold=30.0, time_threshold=60)
#def calculate_fuel_cost(distance, fuel_efficiency, fuel_price_per_liter=40):
cost_df = create_cost_table(users, matches_df, fuel_price_per_liter=40)

# Display the resulting DataFrames
print(cost_df)
print(matches_df)


   user_id_person1 person1_name car_model_person1  fuel_efficiency_person1  \
0                1        Alice             Sedan                    15.00   
1                2          Bob               SUV                    10.00   
2                5         Jame               BMW                    12.10   

   fuel_cost_person1  user_id_person2 person2_name car_model_person2  \
0              13.33                2          Bob               SUV   
1              24.00                1        Alice             Sedan   
2              23.14                4         Sali            Toyota   

   fuel_efficiency_person2  fuel_cost_person2  total_cost  
0                       10              20.00       33.33  
1                       15              16.00       40.00  
2                       12              23.33       46.47  
   user_id_person1 person1_name  user_id_person2 person2_name  start_latitude  \
0                1        Alice                2          Bob           13.70

In [25]:
import pandas as pd
from IPython.display import display

# Assuming cost_df and matches_df are your DataFrames
# Merging the two DataFrames on 'user_id_person1' and 'user_id_person2'

combined_df = pd.merge(matches_df, cost_df, on=['user_id_person1', 'user_id_person2'])

# Optionally, set display options for better formatting in the notebook
pd.set_option('display.float_format', '{:.2f}'.format)

# Display the combined DataFrame as a table
display(combined_df)


Unnamed: 0,user_id_person1,person1_name_x,user_id_person2,person2_name_x,start_latitude,start_longitude,start_point_name,end_latitude,end_longitude,end_point_name,...,distance,person1_name_y,car_model_person1,fuel_efficiency_person1,fuel_cost_person1,person2_name_y,car_model_person2,fuel_efficiency_person2,fuel_cost_person2,total_cost
0,1,Alice,2,Bob,13.7,100.0,Start A,13.8,100.2,End B,...,21.21,Alice,Sedan,15.0,13.33,Bob,SUV,10,20.0,33.33
1,2,Bob,1,Alice,13.75,100.1,Start B,13.85,100.25,End B,...,21.21,Bob,SUV,10.0,24.0,Alice,Sedan,15,16.0,40.0
2,5,Jame,4,Sali,12.0,104.0,Start E,13.1,103.0,End D,...,30.27,Jame,BMW,12.1,23.14,Sali,Toyota,12,23.33,46.47
