**For some unseen locations expected counts are generated based on vehicles entering and leaving another location.** \
These generated dataset is used to train models for predicting turning movement counts for some of unseen locations.

### Importing dependencies

In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np

## Buddha_Vihara_Temple

Expected counts are generated for this location based on vehicles entering and leaving camera id "Stn_HD_1"

In [20]:
def generate_for_datetime(df_path: str) -> pd.DataFrame:
    """
    Generate a DataFrame containing counts for a specific datetime.

    Parameters:
    df_path (str): The path to the CSV file containing the mobility data.

    Returns:
    pd.DataFrame: A DataFrame containing the mobility counts for the specified datet time.
    """
    
    df = pd.read_csv(df_path)
    # Handling some edge cases
    if df.empty:
        return None
    try:
        df["count"] = df["count"].astype(int)
    except KeyError:
        df.columns =["time_stamp","zone_in","zone_out","class","count"]
    time_stamp = df["time_stamp"].iloc[0]
    df_out = df.loc[(df["zone_out"] == 4) | (df["zone_out"] == 5)]
    df_in = df.loc[(df["zone_in"] == 4) | (df["zone_in"] == 5)]
    
    dataset_rows = []
    for cls, count in df_out.groupby(by=["class"], as_index=False)["count"].sum().values:
        dataset_rows.append((time_stamp,0,1,cls,count))
    for cls, count in df_in.groupby(by=["class"], as_index=False)["count"].sum().values:
        dataset_rows.append((time_stamp,1,0,cls,count))

    unseen_loc_df = pd.DataFrame(dataset_rows, columns=["time_stamp", "zone_in", "zone_out", "class", "count"])
    return unseen_loc_df

In [21]:
def generate_for_unseen(dir_path: str, dest_loc: str, src_loc: str) -> None:
    """
    Generate expected counts for an unseen location based on the counts of vehicles entering and leaving a camera id.

    Parameters:
    dir_path (str): The path to the directory containing the mobility data.
    dest_loc (str): The name of the unseen location.
    src_loc (str): The name of the source location.

    Returns:
    None
    """
    cnt = 0
    for i, subdir in enumerate(sorted(os.listdir(dir_path))):
        print("In",subdir)
        cnt = 0
        for j, file in enumerate(sorted(os.listdir(dir_path / subdir))):
            if file.endswith(".csv") and file.count(src_loc) == 1:
                unseen_loc_df = generate_for_datetime(dir_path / subdir / file)
                unseen_loc_df.to_csv(dir_path / subdir / file.replace(src_loc, dest_loc), index=False)
                cnt += 1
        print(f"{cnt+1} files processed")
        

In [22]:
dir_path = Path(r"D:\cv_iitr\bangalore_mobility_hackthon\zones\data\bangaluru_mobility_counts_v2")

In [23]:
dst_loc = "Buddha_Vihara_Temple"
src_loc = "Stn_HD_1"
unseen_loc_df = generate_for_unseen(dir_path, dst_loc, src_loc)

In 2024-05-14
12 files processed
In 2024-05-15
13 files processed
In 2024-05-16
13 files processed
In 2024-05-17
13 files processed
In 2024-05-18
13 files processed
In 2024-05-19
13 files processed
In 2024-05-20
13 files processed
In 2024-05-21
13 files processed
In 2024-05-22
13 files processed
In 2024-05-23
13 files processed
In 2024-05-24
13 files processed
In 2024-05-25
13 files processed
In 2024-05-26
13 files processed
In 2024-05-27
12 files processed
In 2024-05-28
13 files processed
In 2024-05-29
13 files processed
In 2024-05-30
13 files processed
In 2024-05-31
13 files processed
In 2024-06-01
13 files processed


## Sundaranagar_Entrance
Expected counts are generated for this location based on vehicles entering and leaving camera id "Mattikere_JN_FIX_2"

In [24]:
def generate_for_datetime(df_path: str) -> pd.DataFrame:
    """
    Generate a DataFrame containing counts for a specific datetime.

    Parameters:
    df_path (str): The path to the CSV file containing the mobility data.

    Returns:
    pd.DataFrame: A DataFrame containing the mobility counts for the specified datet time.
    """
    df = pd.read_csv(df_path)
    
    # Handling some edge cases
    if df.empty:
        return None
    try:
        df["count"] = df["count"].astype(int)
    except KeyError:
        df.columns =["time_stamp","zone_in","zone_out","class","count"]
    time_stamp = df["time_stamp"].iloc[0]
    df_out = df.loc[(df["zone_out"] == 0) | (df["zone_out"] == 1)]
    df_in = df.loc[(df["zone_in"] == 0) | (df["zone_in"] == 1)]
    
    dataset_rows = []
    for cls, count in df_out.groupby(by=["class"], as_index=False)["count"].sum().values:
        dataset_rows.append((time_stamp,0,1,cls,count))
    for cls, count in df_in.groupby(by=["class"], as_index=False)["count"].sum().values:
        dataset_rows.append((time_stamp,1,0,cls,count))

    unseen_loc_df = pd.DataFrame(dataset_rows, columns=["time_stamp", "zone_in", "zone_out", "class", "count"])
    return unseen_loc_df

In [25]:
def generate_for_unseen(dir_path: str, dest_loc: str, src_loc: str) -> None:
    """
    Generate expected counts for an unseen location based on the counts of vehicles entering and leaving a camera id.

    Parameters:
    dir_path (str): The path to the directory containing the mobility data.
    dest_loc (str): The name of the unseen location.
    src_loc (str): The name of the source location.

    Returns:
    None
    """
    cnt = 0
    for i, subdir in enumerate(sorted(os.listdir(dir_path))):
        print("In",subdir)
        cnt = 0
        for j, file in enumerate(sorted(os.listdir(dir_path / subdir))):
            if file.endswith(".csv") and file.count(src_loc) == 1:
                unseen_loc_df = generate_for_datetime(dir_path / subdir / file)
                if unseen_loc_df is None:
                    continue
                unseen_loc_df.to_csv(dir_path / subdir / file.replace(src_loc, dest_loc), index=False)
                cnt += 1
        print(f"{cnt} files processed")
    
                

In [26]:
dir_path = Path(r"D:\cv_iitr\bangalore_mobility_hackthon\zones\data\bangaluru_mobility_counts_v2")

In [27]:
src_loc = "Mattikere_JN_FIX_2"
dest_loc = "Sundaranagar_Entrance"
unseen_loc_df = generate_for_unseen(dir_path, dest_loc, src_loc)

In 2024-05-14
11 files processed
In 2024-05-15
3 files processed
In 2024-05-16
12 files processed
In 2024-05-17
12 files processed
In 2024-05-18
12 files processed
In 2024-05-19
8 files processed
In 2024-05-20
13 files processed
In 2024-05-21
10 files processed
In 2024-05-22
12 files processed
In 2024-05-23
12 files processed
In 2024-05-24
12 files processed
In 2024-05-25
12 files processed
In 2024-05-26
12 files processed
In 2024-05-27
12 files processed
In 2024-05-28
12 files processed
In 2024-05-29
3 files processed
In 2024-05-30
4 files processed
In 2024-05-31
8 files processed
In 2024-06-01
2 files processed


## 80ft_Road
Expected counts are generated for this location based on vehicles entering and leaving camera id "MS_Ramaiah_JN_FIX_1" and "MS_Ramaiah_JN_FIX_2" respectively.

In [28]:
def generate_for_datetime(dir_path, subdir, src_loc_1_file, src_loc_2_file):
    """
    Generate a DataFrame containing counts for a specific datetime.

    Parameters:
    df_path (str): The path to the CSV file containing the mobility data.

    Returns:
    pd.DataFrame: A DataFrame containing the mobility counts for the specified datet time.
    """
    df_loc_1 = pd.read_csv(dir_path / subdir / src_loc_1_file)
    df_loc_2 = pd.read_csv(dir_path / subdir / src_loc_2_file)
    
    # Handling some edge cases
    if df_loc_1.empty or df_loc_2.empty:
        return None
    try:
        df_loc_1["count"] = df_loc_1["count"].astype(int)
    except KeyError:
        df_loc_1.columns =["time_stamp","zone_in","zone_out","class","count"]
    try:
        df_loc_2["count"] = df_loc_2["count"].astype(int)
    except KeyError:
        df_loc_2.columns =["time_stamp","zone_in","zone_out","class","count"]
        
    time_stamp = df_loc_1["time_stamp"].iloc[0]
    
    df_out = df_loc_2.loc[(df_loc_2["zone_out"] == 6)]
    df_in = df_loc_1.loc[(df_loc_1["zone_in"] == 0)]
    
    dataset_rows = []
    
    for cls, count in df_out.groupby(by=["class"], as_index=False)["count"].sum().values:
        dataset_rows.append((time_stamp,0,1,cls,count))
    
    for cls, count in df_in.groupby(by=["class"], as_index=False)["count"].sum().values:
        dataset_rows.append((time_stamp,1,0,cls,count))

    unseen_loc_df = pd.DataFrame(dataset_rows, columns=["time_stamp", "zone_in", "zone_out", "class", "count"])
    return unseen_loc_df
        

     

In [29]:
def generate_for_unseen(dir_path, dest_loc, src_loc_1, src_loc_2):
    """
    Generate expected counts for an unseen location based on the counts of vehicles entering and leaving two camera ids.

    Parameters:
    dir_path (str): The path to the directory containing the mobility data.
    dest_loc (str): The name of the unseen location.
    src_loc (str): The name of the source location.

    Returns:
    None
    """
    for i, subdir in enumerate(sorted(os.listdir(dir_path))):
        print("In",subdir)
        src_files_loc_1 = dict()
        src_files_loc_2 = dict()
        cnt = 0
        for j, file in enumerate(sorted(os.listdir(dir_path / subdir))):
            if file.endswith(".csv"):
                if file.count(src_loc_1) == 1:
                    time = file.split("time")[-1]
                    src_files_loc_1[time] = file
                elif file.count(src_loc_2) == 1:
                    time = file.split("time")[-1]
                    src_files_loc_2[time] = file
        
        # Make sure both the camera ids have the extracted counts for the same time
        for time in src_files_loc_1.keys():
            if time in src_files_loc_2:
                unseen_loc_df = generate_for_datetime(dir_path, subdir, src_files_loc_1[time], src_files_loc_2[time])
                
                if unseen_loc_df is None:
                    continue
                unseen_loc_df.to_csv(dir_path / subdir / src_files_loc_1[time].replace(src_loc_1, dest_loc), index=False)
                cnt += 1
        print(f"{cnt} files processed")
    
                

In [30]:
dir_path = Path(r"D:\cv_iitr\bangalore_mobility_hackthon\zones\data\bangaluru_mobility_counts_v2")

In [31]:
src_loc_1 = "MS_Ramaiah_JN_FIX_1"
src_loc_2 = "MS_Ramaiah_JN_FIX_2"
dest_loc = "80ft_Road"
unseen_loc_df = generate_for_unseen(dir_path, dest_loc, src_loc_1, src_loc_2)

In 2024-05-14
8 files processed
In 2024-05-15
12 files processed
In 2024-05-16
12 files processed
In 2024-05-17
12 files processed
In 2024-05-18
12 files processed
In 2024-05-19
12 files processed
In 2024-05-20
3 files processed
In 2024-05-21
0 files processed
In 2024-05-22
11 files processed
In 2024-05-23
12 files processed
In 2024-05-24
12 files processed
In 2024-05-25
0 files processed
In 2024-05-26
0 files processed
In 2024-05-27
12 files processed
In 2024-05-28
12 files processed
In 2024-05-29
13 files processed
In 2024-05-30
0 files processed
In 2024-05-31
12 files processed
In 2024-06-01
13 files processed
