In [11]:
import pandas as pd
from skmob import TrajDataFrame

# Load the cleaned dataset
DATA_PATH = "/Users/srimukund/Desktop/transportation/cleaned_trajectories.csv"
data = pd.read_csv(DATA_PATH)

# Ensure proper data types
data['utc_date'] = pd.to_datetime(data['utc_date'])

# Print the columns of the original DataFrame
print("Original DataFrame columns:")
print(data.columns)

# Rename columns for compatibility with TrajDataFrame
data.rename(columns={
    'caid': 'user_id',
    'latitude': 'lat',
    'longitude': 'lng',
    'state_pois': 'state'
}, inplace=True)

# Confirm column renaming
print("Columns after renaming:")
print(data.columns)

# Print the first few rows to inspect the data
print("First few rows of the DataFrame after renaming:")
print(data.head())

# Sort data by user and timestamp
data = data.sort_values(by=['user_id', 'utc_date'])

# Print the first few rows after sorting
print("First few rows of the DataFrame after sorting:")
print(data.head())

# Select only the necessary columns for TrajDataFrame
required_columns = ['lat', 'lng', 'utc_date', 'user_id']
data_for_traj = data[required_columns]

# Print the first few rows of the data to be converted to TrajDataFrame
print("Data to be converted to TrajDataFrame:")
print(data_for_traj.head())

# Explicitly set column names
data_for_traj.columns = ['lat', 'lng', 'datetime', 'user_id']

# Print the columns of the DataFrame to be converted to TrajDataFrame
print("Columns of DataFrame to be converted to TrajDataFrame:")
print(data_for_traj.columns)

# Convert to TrajDataFrame
try:
    traj = TrajDataFrame(data_for_traj, latitude='lat', longitude='lng', datetime='datetime', user_id='user_id')
    print("TrajDataFrame successfully created!")
    print(traj.head())

    # Explicitly rename 'uid' to 'user_id' if needed
    traj.rename(columns={'uid': 'user_id'}, inplace=True)
except Exception as e:
    print(f"An error occurred while converting to TrajDataFrame: {e}")

# Validate TrajDataFrame
def validate_trajdataframe(traj_df):
    required_columns = ['lat', 'lng', 'datetime', 'user_id']
    for col in required_columns:
        if col not in traj_df.columns:
            print(f"Missing required column: {col}")
            return False
    print("Validation successful: TrajDataFrame contains all required columns.")
    return True

# Run validation
validate_trajdataframe(traj)



Original DataFrame columns:
Index(['utc_date', 'placekey', 'location_name', 'naics_code', 'street_address',
       'city', 'state', 'census_block_group', 'local_timestamp', 'caid',
       'id_type', 'top_category', 'sub_category', 'brands', 'zip_code',
       'minimum_dwell', 'geohash_5', 'location_name_pois', 'naics_code_pois',
       'street_address_pois', 'city_pois', 'state_pois',
       'census_block_group_pois', 'latitude', 'longitude', 'zip_code_pois',
       'location_name_home', 'home_flag'],
      dtype='object')
Columns after renaming:
Index(['utc_date', 'placekey', 'location_name', 'naics_code', 'street_address',
       'city', 'state', 'census_block_group', 'local_timestamp', 'user_id',
       'id_type', 'top_category', 'sub_category', 'brands', 'zip_code',
       'minimum_dwell', 'geohash_5', 'location_name_pois', 'naics_code_pois',
       'street_address_pois', 'city_pois', 'state', 'census_block_group_pois',
       'lat', 'lng', 'zip_code_pois', 'location_name_home', 'h

True

In [12]:
pip install streamlit folium geopy pandas skmob


Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
[31mERROR: Could not find a version that satisfies the requirement skmob (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for skmob[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.
