In [2]:
import pandas as pd


def generate_car_matrix(df)->pd.DataFrame:

    if 'id_1' not in df.columns or 'id_2' not in df.columns or 'car' not in df.columns:
        raise ValueError("DataFrame must have 'id_1', 'id_2', and 'car' columns")

        # Create a matrix using pivot
    car_matrix = df.pivot(index='id_1', columns='id_2', values='car')

    # Fill NaN values with 0 and set diagonal values to 0
    car_matrix = car_matrix.fillna(0)
    car_matrix.values[[range(len(car_matrix))] * 2] = 0

    return car_matrix

    """
    Creates a DataFrame  for id combinations.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Matrix generated with 'car' values, 
                          where 'id_1' and 'id_2' are used as indices and columns respectively.
    """


df = pd.read_csv("C:/Users/pprasadd/MapUp-Data-Assessment-F/datasets/dataset-1.csv")

print(generate_car_matrix(df))

id_2  801  802  803  804  805  806  807  808  809  821  822  823  824  825  \
id_1                                                                         
801   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
802   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
803   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
804   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
805   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
806   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
807   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
808   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
809   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
821   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
822   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0

In [3]:


def get_type_count(df)->dict:
    if 'car' not in df.columns:
        raise ValueError("DataFrame must have a 'car' column")

        # Create a new column 'car_type' based on specified conditions
    df['car_type'] = pd.cut(df['car'], bins=[-float('inf'), 15, 25, float('inf')],
                            labels=['low', 'medium', 'high'], right=False)

    # Count occurrences of each car_type category
    type_counts = df['car_type'].value_counts().to_dict()

    # Sort the dictionary alphabetically based on keys
    type_counts = dict(sorted(type_counts.items()))

    return type_counts


df = pd.read_csv("C:/Users/pprasadd/MapUp-Data-Assessment-F/datasets/dataset-1.csv")

print(get_type_count(df))

{'high': 56, 'low': 196, 'medium': 89}


In [4]:
def get_bus_indexes(df)->list:
    # Check if the required column is present
    if 'bus' not in df.columns:
        raise ValueError("DataFrame must have a 'bus' column")

    # Calculate the mean value of the 'bus' column
    bus_mean = df['bus'].mean()

    # Identify indices where the 'bus' values are greater than twice the mean
    bus_indexes = df[df['bus'] > 2 * bus_mean].index.tolist()

    # Sort the indices in ascending order
    bus_indexes.sort()

    return bus_indexes


df = pd.read_csv("C:/Users/pprasadd/MapUp-Data-Assessment-F/datasets/dataset-1.csv")

print(get_bus_indexes(df))

[2, 7, 12, 17, 25, 30, 54, 64, 70, 97, 144, 145, 149, 154, 160, 201, 206, 210, 215, 234, 235, 245, 250, 309, 314, 319, 322, 323, 334, 340]


In [5]:
def filter_routes(df)->list:
    # Check if the required columns are present
    if 'route' not in df.columns or 'truck' not in df.columns:
        raise ValueError("DataFrame must have 'route' and 'truck' columns")

    # Group by 'route' and calculate the average of 'truck' for each route
    route_avg_truck = df.groupby('route')['truck'].mean()

    # Filter routes where the average of 'truck' is greater than 7
    filtered_routes = route_avg_truck[route_avg_truck > 7].index.tolist()

    # Sort the list of routes
    filtered_routes.sort()

    return filtered_routes


df = pd.read_csv("C:/Users/pprasadd/MapUp-Data-Assessment-F/datasets/dataset-1.csv")

print(filter_routes(df))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [9]:
def multiply_matrix(matrix)->pd.DataFrame:
    if not isinstance(matrix, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame")

        # Apply the specified logic to modify values
    modified_matrix = matrix.applymap(lambda x: x * 0.75 if x > 20 else x * 1.25)

    # Round values to 1 decimal place
    modified_matrix = modified_matrix.round(1)

    return modified_matrix

df = pd.read_csv("C:/Users/pprasadd/MapUp-Data-Assessment-F/datasets/dataset-1.csv")

matrix = generate_car_matrix(df)
modified_matrix = multiply_matrix(matrix)
print(modified_matrix)

id_2  801  802  803  804  805  806  807  808  809  821  822  823  824  825  \
id_1                                                                         
801   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
802   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
803   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
804   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
805   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
806   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
807   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
808   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
809   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
821   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
822   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0