In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import random

In [15]:
file_path = "./alldata.csv"
data = pd.read_csv(file_path)

In [12]:
# List of downtown area tracts
downtown_areas = [
    '17031839000', '17031080202', '17031833000', '17031833100', '17031839100',
    '17031081201', '17031080201', '17031081202', '17031842200', '17031838300',
    '17031081401', '17031081403', '17031081300', '17031081100', '17031080300',
    '17031080100', '17031080400', '17031081000', '17031320400', '17031320600',
    '17031081402', '17031320100', '17031081900', '17031081500', '17031081800',
    '17031081600', '17031081700', '17031280100', '17031281900'
]

# Convert downtown_areas to integers
downtown_areas_int = [int(tract) for tract in downtown_areas]

# Convert origin and destination columns to integers if they're not already
data['origin'] = data['origin'].astype(int)
data['destination'] = data['destination'].astype(int)

# Create a boolean mask for downtown origins and destinations
downtown_origin = data['origin'].isin(downtown_areas_int)
downtown_destination = data['destination'].isin(downtown_areas_int)

# Count trips where both origin and destination are downtown
downtown_to_downtown_count = data[downtown_origin & downtown_destination].shape[0]

print(f"Number of downtown to downtown travels: {downtown_to_downtown_count}")

Number of downtown to downtown travels: 841


In [21]:
# Filter the data for downtown-to-downtown travels
downtown_to_downtown = data[downtown_origin & downtown_destination]

# Display basic information about the dataset
print("\nDataset Info:")
print(downtown_to_downtown.info())

# Display summary statistics
print("\nSummary Statistics:")
print(downtown_to_downtown.describe())


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
Index: 841 entries, 6650 to 39639
Data columns (total 72 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             841 non-null    int64  
 1   destination            841 non-null    int64  
 2   origin                 841 non-null    int64  
 3   total_number_trips     841 non-null    int64  
 4   fare_sd                841 non-null    float64
 5   fare_median            841 non-null    float64
 6   miles_sd               841 non-null    float64
 7   miles_median           841 non-null    float64
 8   seconds_sd             841 non-null    float64
 9   seconds_median         841 non-null    float64
 10  airport_indicator      841 non-null    int64  
 11  downtown_indicator     841 non-null    int64  
 12  pcttransit_Des         841 non-null    float64
 13  pctmidinc_Des          841 non-null    float64
 14  pctmale_Des            841 non-null    floa

In [19]:
# Create boolean masks for downtown origins and destinations
downtown_origin = data['origin'].isin(downtown_areas_int)
downtown_destination = data['destination'].isin(downtown_areas_int)

# Create new column: 'downtown_downtown'
data['downtown_downtown'] = ((downtown_origin) & (downtown_destination)).astype(int)

# Verify the number of downtown-to-downtown travels (should be 841)
print(f"Number of downtown to downtown travels: {data['downtown_downtown'].sum()}")

'''
# Save the updated data to a new CSV file
output_file = "alldata_downtownTodowntown.csv"
data.to_csv(output_file, index=False)
'''

# Display basic information about the updated dataset (w/ downtown_downtown)
print("\nUpdated Dataset Info:")
print(data.info())

Number of downtown to downtown travels: 841

Updated Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67498 entries, 0 to 67497
Data columns (total 72 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             67498 non-null  int64  
 1   destination            67498 non-null  int64  
 2   origin                 67498 non-null  int64  
 3   total_number_trips     67498 non-null  int64  
 4   fare_sd                67498 non-null  float64
 5   fare_median            67498 non-null  float64
 6   miles_sd               67498 non-null  float64
 7   miles_median           67498 non-null  float64
 8   seconds_sd             67498 non-null  float64
 9   seconds_median         67498 non-null  float64
 10  airport_indicator      67498 non-null  int64  
 11  downtown_indicator     67498 non-null  int64  
 12  pcttransit_Des         67498 non-null  float64
 13  pctmidinc_Des          67498 non-null  

In [17]:
downtown_file_path = "./alldata_downtownTodowntown.csv"
data1 = pd.read_csv(downtown_file_path)
data1.head()

Unnamed: 0.1,Unnamed: 0,destination,origin,total_number_trips,fare_sd,fare_median,miles_sd,miles_median,seconds_sd,seconds_median,...,PctWacBachelor_Des,PctWacBachelor_Ori,EmpDen_Des,EmpDen_Ori,EmpRetailDen_Des,EmpRetailDen_Ori,Commuters_HW,Commuters_WH,airport_trip,downtown_downtown
0,0,17031010100,17031010100,286,2.777866,5.0,1.147656,0.907605,388.9408,356.0,...,0.242925,0.242925,2876.438682,2876.438682,183.169444,183.169444,30,30,0,0
1,1,17031010201,17031010100,276,1.362977,5.0,0.302798,0.950614,111.314457,269.5,...,0.221649,0.242925,994.13537,2876.438682,51.244091,183.169444,3,8,0,0
2,2,17031010202,17031010100,251,1.472456,2.5,0.362001,0.684815,122.863992,234.0,...,0.20801,0.242925,5688.892004,2876.438682,1065.748502,183.169444,13,8,0,0
3,3,17031010300,17031010100,244,1.467996,2.5,0.277961,0.641315,93.832529,210.0,...,0.214729,0.242925,6172.36086,2876.438682,54.768064,183.169444,24,14,0,0
4,4,17031010400,17031010100,358,1.461406,5.0,0.500151,1.34418,168.688443,326.5,...,0.163022,0.242925,1129.399629,2876.438682,152.682256,183.169444,8,3,0,0


In [18]:
# Count the number of rows where downtown_downtown is 1
downtown_downtown_count = data1['downtown_downtown'].sum()

print(f"Number of downtown to downtown travels: {downtown_downtown_count}")

# Use value_counts() to see the distribution of 0 vs 1 for downtown_downtown
value_counts = data1['downtown_downtown'].value_counts()
print("\nDistribution of downtown_downtown values:")
print(value_counts)

# Calculate the percentage of downtown to downtown travels
total_travels = len(data1)
percentage = (downtown_downtown_count / total_travels) * 100

print(f"\nPercentage of downtown to downtown travels: {percentage:.2f}%")

Number of downtown to downtown travels: 841

Distribution of downtown_downtown values:
downtown_downtown
0    66657
1      841
Name: count, dtype: int64

Percentage of downtown to downtown travels: 1.25%
