In [5]:
import pandas as pd
def generate_car_matrix(df)->pd.DataFrame:
    """
    Creates a DataFrame  for id combinations.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Matrix generated with 'car' values, 
                          where 'id_1' and 'id_2' are used as indices and columns respectively.
    """
    df = pd.read_csv("dataset-1.csv")
    car_matrix = df.pivot(index='id_1', columns='id_2', values='car')
    car_matrix = car_matrix.fillna(0)
    for idx in car_matrix.index:
        car_matrix.loc[idx, idx] = 0

    return df

In [6]:
def get_type_count(df)->dict:
    """
    Categorizes 'car' values into types and returns a dictionary of counts.

    Args:
        df (pandas.DataFrame)

    Returns:
        dict: A dictionary with car types as keys and their counts as values.
    """
    df = pd.read_csv(dataset_path)
    conditions = [
        (df['car'] <= 15),
        (df['car'] > 15) & (df['car'] <= 25),
        (df['car'] > 25)
    ]
    choices = ['low', 'medium', 'high']
    df['car_type'] = pd.Series(np.select(conditions, choices))
    
    type_counts = df['car_type'].value_counts().to_dict()
    
    sorted_type_counts = dict(sorted(type_counts.items()))

    return dict()

In [7]:
def get_bus_indexes(df)->list:
    """
    Returns the indexes where the 'bus' values are greater than twice the mean.

    Args:
        df (pandas.DataFrame)

    Returns:
        list: List of indexes where 'bus' values exceed twice the mean.
    """
    df = pd.read_csv("dataset-1.csv")
    
    bus_mean = df['bus'].mean()
    
    bus_indexes = df[df['bus'] > 2 * bus_mean].index.tolist()
    
    sorted_bus_indexes = sorted(bus_indexes)


    return list()

In [8]:
def filter_routes(df)->list:
    """
    Filters and returns routes with average 'truck' values greater than 7.

    Args:
        df (pandas.DataFrame)

    Returns:
        list: List of route names with average 'truck' values greater than 7.
    """
    df = pd.read_csv("dataset-1.csv")
    
    avg_truck_by_route = df.groupby('route')['truck'].mean()
    
    selected_routes = avg_truck_by_route[avg_truck_by_route > 7].index.tolist()
    
    sorted_routes = sorted(selected_routes)

    return list()

In [9]:
def multiply_matrix(matrix)->pd.DataFrame:
    """
    Multiplies matrix values with custom conditions.

    Args:
        matrix (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Modified matrix with values multiplied based on custom conditions.
    """
    modified_matrix = input_matrix.copy()
    
    modified_matrix = modified_matrix.applymap(lambda x: x * 0.75 if x > 20 else x * 1.25)
    
    modified_matrix = modified_matrix.round(1)
    
    return matrix

In [10]:
def time_check(df)->pd.Series:
    """
    Use shared dataset-2 to verify the completeness of the data by checking whether the timestamps for each unique (`id`, `id_2`) pair cover a full 24-hour and 7 days period

    Args:
        df (pandas.DataFrame)

    Returns:
        pd.Series: return a boolean series
    """
    min_datetime = group['start_datetime'].min()
    
    max_datetime = group['end_datetime'].max()
    
    full_24_hours = (max_datetime - min_datetime) >= pd.Timedelta(days=1)
    
    span_all_days = len(group['start_datetime'].dt.dayofweek.unique()) == 7

    return pd.Series()