### 1. Car Matrix Generation

In [1]:
import pandas as pd

In [2]:
def generate_car_matrix(df):
    # Pivot the DataFrame to create a matrix using id_1 as index, id_2 as columns, and car as values
    pivot_df = df.pivot(index='id_1', columns='id_2', values='car')
    
    # Fill NaN values with 0
    pivot_df = pivot_df.fillna(0)
    
    # Set diagonal values to 0
    for i in range(min(pivot_df.shape)):
        pivot_df.iloc[i, i] = 0
    
    return pivot_df

# Read the dataset-1.csv file into a DataFrame
file_path = 'C:\\Users\\HP\\Downloads\\dataset-1.csv'
data = pd.read_csv(file_path)

# Call the function with the DataFrame
result = generate_car_matrix(data)
print(result) 

id_2    801    802    803    804    805    806    807    808    809    821  \
id_1                                                                         
801    0.00   2.80   6.00   7.70  11.70  13.40  16.90  19.60  21.00  23.52   
802    2.80   0.00   3.40   5.20   9.20  10.90  14.30  17.10  18.50  20.92   
803    6.00   3.40   0.00   2.00   6.00   7.70  11.10  13.90  15.30  17.72   
804    7.70   5.20   2.00   0.00   4.40   6.10   9.50  12.30  13.70  16.12   
805   11.70   9.20   6.00   4.40   0.00   2.00   5.40   8.20   9.60  12.02   
806   13.40  10.90   7.70   6.10   2.00   0.00   3.80   6.60   8.00  10.42   
807   16.90  14.30  11.10   9.50   5.40   3.80   0.00   2.90   4.30   6.82   
808   19.60  17.10  13.90  12.30   8.20   6.60   2.90   0.00   1.70   4.12   
809   21.00  18.50  15.30  13.70   9.60   8.00   4.30   1.70   0.00   2.92   
821   23.52  20.92  17.72  16.12  12.02  10.42   6.82   4.12   2.92   0.00   
822   24.67  22.07  18.87  17.27  13.17  11.57   7.97   5.27   4

### 2. Car Type Count Calculation

In [3]:
def get_type_count(df):
    # Add a new categorical column 'car_type' based on values in the 'car' column
    df['car_type'] = pd.cut(df['car'], bins=[float('-inf'), 15, 25, float('inf')], labels=['low', 'medium', 'high'])
    
    # Calculate count of occurrences for each 'car_type' category
    type_counts = df['car_type'].value_counts().to_dict()
    
    # Sort the dictionary alphabetically based on keys
    sorted_type_counts = dict(sorted(type_counts.items()))
    
    return sorted_type_counts

# Read the dataset-1.csv file into a DataFrame
file_path = 'C:\\Users\\HP\\Downloads\\dataset-1.csv'
data = pd.read_csv(file_path)

# Call the function with the DataFrame
result = get_type_count(data)
print(result)

{'high': 56, 'low': 196, 'medium': 89}


### 3.Bus Count Index Retrieval

In [5]:
def get_bus_indexes(df):
    # Calculate the mean value of the 'bus' column
    mean_bus = df['bus'].mean()
    
    # Find indices where 'bus' values are greater than twice the mean
    bus_indexes = df[df['bus'] > 2 * mean_bus].index.tolist()
    
    # Sort the indices in ascending order
    bus_indexes.sort()
    
    return bus_indexes

# Read the dataset-1.csv file into a DataFrame
file_path = 'C:\\Users\\HP\\Downloads\\dataset-1.csv'
data = pd.read_csv(file_path)

# Call the function with the DataFrame
result = get_bus_indexes(data)
print(result)

[2, 7, 12, 17, 25, 30, 54, 64, 70, 97, 144, 145, 149, 154, 160, 201, 206, 210, 215, 234, 235, 245, 250, 309, 314, 319, 322, 323, 334, 340]


### 4.Route Filtering

In [7]:
def filter_routes(df):
    # Group by 'route' column and calculate the average of 'truck' column for each route
    route_avg_truck = df.groupby('route')['truck'].mean()
    
    # Filter routes where the average of 'truck' column is greater than 7
    filtered_routes = route_avg_truck[route_avg_truck > 7].index.tolist()
    
    # Sort the list of routes in ascending order
    filtered_routes.sort()
    
    return filtered_routes

# Read the dataset-1.csv file into a DataFrame
file_path = 'C:\\Users\\HP\\Downloads\\dataset-1.csv'
data = pd.read_csv(file_path)

# Call the function with the DataFrame
result = filter_routes(data)
print(result)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


### 5.Matrix Value Modification

In [12]:
def multiply_matrix(input_df):
    modified_df = input_df.copy()  # Create a copy of the input DataFrame
    
    # Apply the multiplication logic based on the conditions
    modified_df = modified_df.applymap(lambda x: x * 0.75 if x > 20 else x * 1.25)
    
    # Round the values to 1 decimal place
    modified_df = modified_df.round(1)
    
    return modified_df

# Assuming 'result' is a list of lists representing a matrix
# Example list
result = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

# Convert the list of lists to a DataFrame
result_df = pd.DataFrame(result)

# Assuming 'result' contains the DataFrame from Question 1
modified_result = multiply_matrix(result_df)
print(modified_result)

     0     1     2
0  1.2   2.5   3.8
1  5.0   6.2   7.5
2  8.8  10.0  11.2


# 6.Time Check

In [15]:
def check_time_completeness(df):
    # Convert timestamp columns to datetime objects
    df['start_timestamp'] = pd.to_datetime(df['startDay'] + ' ' + df['startTime'], errors='coerce')
    df['end_timestamp'] = pd.to_datetime(df['endDay'] + ' ' + df['endTime'], errors='coerce')

    # Calculate the duration of each interval
    df['interval_duration'] = df['end_timestamp'] - df['start_timestamp']
    
    # Group by (id, id_2) and check completeness
    completeness_check = df.groupby(['id', 'id_2']).apply(
        lambda x: (
            x['interval_duration'].sum() >= pd.Timedelta(days=7) 
            and x['start_timestamp'].min().time() == pd.Timestamp('00:00:00').time()
            and x['end_timestamp'].max().time() == pd.Timestamp('23:59:59').time()
        )
    )
    
    return completeness_check

# Assuming 'data' contains the dataset-2.csv file read as a DataFrame
file_path = 'C:\\Users\\HP\\Downloads\\dataset-2.csv'
data = pd.read_csv(file_path)

# Call the function with the DataFrame
result = check_time_completeness(data)
print(result)

id       id_2    
1014000  -1          False
1014002  -1          False
1014003  -1          False
1030000  -1          False
          1030002    False
                     ...  
1330016   1330006    False
          1330008    False
          1330010    False
          1330012    False
          1330014    False
Length: 9254, dtype: bool
