In [2]:
from faker import Faker
import csv
import random
from datetime import datetime, timedelta

fake = Faker()

# Specify latitude and longitude ranges for the campus area
latitude_range = (39.9472, 39.9566)
longitude_range = (-75.1911, -75.2033)

# Generate fake user data
users = []

for user_id in range(1, 41):  # Generate 40 unique user IDs
    interests = random.sample(['Data Science', 'Art', 'Networking', 'Consultancy'], 3)  # Randomly select 3 unique interests for each user
    
    for _ in range(random.randint(5, 6)):  # Generate 5-6 timestamps for each user
        timestamp = fake.date_time_between(start_date='-1d', end_date='now')  # Generate timestamp within the last day
        latitude = round(random.uniform(latitude_range[0], latitude_range[1]), 6)
        longitude = round(random.uniform(longitude_range[0], longitude_range[1]), 6)
        
        user_data = {
            'UserID': user_id,
            'Timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            'Latitude': latitude,
            'Longitude': longitude,
            'Interest1': interests[0],
            'Interest2': interests[1],
            'Interest3': interests[2]
        }
        
        users.append(user_data)

# Write fake data to CSV file
with open('fake_users.csv', 'w', newline='') as csvfile:
    fieldnames = ['UserID', 'Timestamp', 'Latitude', 'Longitude', 'Interest1', 'Interest2', 'Interest3']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for user in users:
        writer.writerow(user)


In [5]:
import pandas as pd
from shapely.geometry import Point

# Read the CSV file into a DataFrame
df = pd.read_csv("fake_users.csv")

# Function to create Point geometry in CRS 4326 from latitude and longitude
def create_point_geometry(row):
    lon, lat = row["Longitude"], row["Latitude"]
    return Point(lon, lat)

# Apply the function to create Point geometries
df["geometry"] = df.apply(create_point_geometry, axis=1)

# Optional: Drop the original latitude and longitude columns if needed
#df = df.drop(columns=["latitude_column_name", "longitude_column_name"])

# Save the DataFrame to a new CSV file
df.to_csv("fake_users_geom.csv", index=False)


In [4]:
import pandas as pd
import json
from geopy.distance import geodesic
from datetime import datetime, timedelta

def find_matching_pairs(users):
    matching_pairs = []

    for i, user1 in users.iterrows():
        for j, user2 in users.iloc[i+1:].iterrows():
            # Convert latitude and longitude to floats
            lat1, lon1 = float(user1['Latitude']), float(user1['Longitude'])
            lat2, lon2 = float(user2['Latitude']), float(user2['Longitude'])

            # Calculate distance between users' locations
            distance = geodesic((lat1, lon1), (lat2, lon2)).miles
            
            # Convert timestamps to datetime objects
            timestamp1 = datetime.strptime(user1['Timestamp'], '%Y-%m-%d %H:%M:%S')
            timestamp2 = datetime.strptime(user2['Timestamp'], '%Y-%m-%d %H:%M:%S')
            
            # Calculate time difference in minutes
            time_diff = abs((timestamp2 - timestamp1).total_seconds() / 60)

            # Check if users are within 1 mile, 60 minutes, and have at least one matching interest
            if distance <= 1 and time_diff <= 60 and any(interest in user1.values for interest in user2.values):
                matching_pairs.append({
                    'user1_id': user1['UserID'],
                    'user2_id': user2['UserID'],
                    'common_interest': next(interest for interest in user1.values if interest in user2.values),
                    'timestamp': max(timestamp1, timestamp2).strftime('%Y-%m-%d %H:%M:%S')
                })

    return matching_pairs

def main():
    # Read user data from CSV file using pandas
    users = pd.read_csv('fake_users_geom.csv')

    # Find matching user pairs
    matching_pairs = find_matching_pairs(users)

    # Save matching pairs to JSON file
    with open('matching_pairs.json', 'w') as jsonfile:
        json.dump(matching_pairs, jsonfile, indent=4)

    # Save matching pairs to CSV file using pandas
    matching_pairs_df = pd.DataFrame(matching_pairs)
    matching_pairs_df.to_csv('matching_pairs.csv', index=False)

    print("Matching user pairs saved to JSON and CSV files.")

if __name__ == "__main__":
    main()


Matching user pairs saved to JSON and CSV files.


In [6]:
import pandas as pd
import json
from geopy.distance import geodesic
from datetime import datetime, timedelta

def find_matching_pairs(users):
    matching_pairs = []

    for i, user1 in users.iterrows():
        for j, user2 in users.iloc[i+1:].iterrows():
            # Convert latitude and longitude to floats
            lat1, lon1 = float(user1['Latitude']), float(user1['Longitude'])
            lat2, lon2 = float(user2['Latitude']), float(user2['Longitude'])

            # Calculate distance between users' locations
            distance = geodesic((lat1, lon1), (lat2, lon2)).miles
            
            # Convert timestamps to datetime objects
            timestamp1 = datetime.strptime(user1['Timestamp'], '%Y-%m-%d %H:%M:%S')
            timestamp2 = datetime.strptime(user2['Timestamp'], '%Y-%m-%d %H:%M:%S')
            
            # Calculate time difference in minutes
            time_diff = abs((timestamp2 - timestamp1).total_seconds() / 60)

            # Check if users are within 1 mile, 60 minutes, and have at least one matching interest
            if distance <= 1 and time_diff <= 60 and any(interest in user1.values for interest in user2.values):
                # Calculate average latitude and longitude
                avg_lat = (lat1 + lat2) / 2
                avg_lon = (lon1 + lon2) / 2
                
                matching_pairs.append({
                    'user1_id': user1['UserID'],
                    'user2_id': user2['UserID'],
                    'common_interest': next(interest for interest in user1.values if interest in user2.values),
                    'timestamp': max(timestamp1, timestamp2).strftime('%Y-%m-%d %H:%M:%S'),
                    'avg_lat': avg_lat,
                    'avg_lon': avg_lon
                })

    return matching_pairs

def main():
    # Read user data from CSV file using pandas
    users = pd.read_csv('fake_users_geom.csv')

    # Find matching user pairs
    matching_pairs = find_matching_pairs(users)

    # Save matching pairs to JSON file
    with open('matching_pairs.json', 'w') as jsonfile:
        json.dump(matching_pairs, jsonfile, indent=4)

    # Save matching pairs to CSV file using pandas
    matching_pairs_df = pd.DataFrame(matching_pairs)
    matching_pairs_df.to_csv('matching_pairs.csv', index=False)

    print("Matching user pairs saved to JSON and CSV files.")

if __name__ == "__main__":
    main()


Matching user pairs saved to JSON and CSV files.
