In [11]:
import pandas as pd

def get_distinct_values(df, column_name):
    """
    Returns a DataFrame with distinct values of the specified column.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The name of the column to extract distinct values from.

    Returns:
    pd.DataFrame: A DataFrame with one column containing distinct values.
    """
    distinct_values = df[[column_name]].drop_duplicates().reset_index(drop=True)
    return distinct_values.rename(columns={column_name: column_name})

def replace_value_in_column(df, column_name, old_value, new_value):
    """
    Replaces occurrences of old_value with new_value in the specified column of the DataFrame.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The name of the column where the replacement will occur.
    old_value (str): The value to be replaced.
    new_value (str): The value to replace with.

    Returns:
    pd.DataFrame: The DataFrame with the updated column.
    """
    df[column_name] = df[column_name].replace(old_value, new_value)
    return df

def calculate_discounted_price(df):
    """
    Calculates the discounted price for each product in the DataFrame.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing a 'ListPrice' column.

    Returns:
    pd.DataFrame: The DataFrame with an additional 'DiscountedPrice' column.
    """
    df['DiscountedPrice'] = df['ListPrice'] * 0.9
    return df

def calculate_average_price_by_category(df):
    """
    Calculates the average price grouped by the 'Category' column.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing 'Category' and 'ListPrice' columns.

    Returns:
    pd.DataFrame: A new DataFrame with 'Category' and the average 'ListPrice'.
    """
    average_price_df = df.groupby('Category', as_index=False)['ListPrice'].mean()
    average_price_df.rename(columns={'ListPrice': 'AveragePrice'}, inplace=True)
    average_price_df = average_price_df.sort_values(by='AveragePrice', ascending=False)
    return average_price_df

In [12]:
import pandas as pd

# Replace 'file_path.csv' with the path to your CSV file
df = pd.read_csv('products.csv')


df = replace_value_in_column(df, 'Category', 'MountainBikes', 'Mountain Bikes')
df = replace_value_in_column(df, 'Category', 'RoadBikes', 'Road Bikes')
df = replace_value_in_column(df, 'Category', 'Roadikes', 'Road Bikes')

distinct = get_distinct_values(df, 'Category')

added_new_price = calculate_discounted_price(df)

avg_prices = calculate_average_price_by_category(df)

print(avg_prices)

# Display the first few rows of the DataFrame
# print(df.head())

             Category  AveragePrice
21     Mountain Bikes   1683.365000
27         Road Bikes   1597.450000
34      Touring Bikes   1425.248182
23     MountainFrames   1364.500000
28        Road Frames    780.043636
22    Mountain Frames    652.837037
35     Touring Frames    631.415556
9           Cranksets    278.990000
37             Wheels    220.929286
12              Forks    184.400000
2         Bike Stands    159.000000
24           Panniers    125.000000
1          Bike Racks    120.000000
5              Brakes    106.500000
10        Derailleurs    106.475000
4     Bottom Brackets     92.240000
0          Bib-Shorts     89.990000
15           Headsets     87.073333
32             Tights     74.990000
14         Handlebars     73.890000
30             Shorts     64.275714
25             Pedals     64.018571
36              Vests     63.500000
17    Hydration Packs     54.990000
18            Jerseys     51.990000
29            Saddles     39.633333
16            Helmets     34