In [None]:
import pandas as pd

# 1. Read the provided CSV file ‘data.csv’
file_path = 'data.csv'
df = pd.read_csv('data.csv')

# 2. Show the basic statistical description about the data
stat_description = df.describe()
print("Basic Statistical Description:\n", stat_description)

# 3. Check if the data has null values
null_values = df.isnull().sum()
print("\nNull Values:\n", null_values)

# a. Replace the null values with the mean
df.fillna(df.mean(), inplace=True)

# 4. Select at least two columns and aggregate the data using: min, max, count, mean
aggregated_data = df[['Duration', 'Calories']].agg(['min', 'max', 'count', 'mean'])
print("\nAggregated Data:\n", aggregated_data)

# 5. Filter the dataframe to select the rows with calories values between 500 and 1000
filtered_df_500_1000 = df[(df['Calories'] >= 500) & (df['Calories'] <= 1000)]

# 6. Filter the dataframe to select the rows with calories values > 500 and pulse < 100
filtered_df_500_pulse_100 = df[(df['Calories'] > 500) & (df['Pulse'] < 100)]

# 7. Create a new “df_modified” dataframe that contains all the columns from df except for “Maxpulse”
df_modified = df.drop(columns=['Maxpulse'])

# 8. Delete the “Maxpulse” column from the main df dataframe
df.drop(columns=['Maxpulse'], inplace=True)

# 9. Convert the datatype of Calories column to int datatype
df['Calories'] = df['Calories'].astype(int)
print("\nDataFrame with Calories as int:\n", df)


Basic Statistical Description:
          Duration       Pulse    Maxpulse     Calories
count  169.000000  169.000000  169.000000   164.000000
mean    63.846154  107.461538  134.047337   375.790244
std     42.299949   14.510259   16.450434   266.379919
min     15.000000   80.000000  100.000000    50.300000
25%     45.000000  100.000000  124.000000   250.925000
50%     60.000000  105.000000  131.000000   318.600000
75%     60.000000  111.000000  141.000000   387.600000
max    300.000000  159.000000  184.000000  1860.400000

Null Values:
 Duration    0
Pulse       0
Maxpulse    0
Calories    5
dtype: int64

Aggregated Data:
          Duration     Calories
min     15.000000    50.300000
max    300.000000  1860.400000
count  169.000000   169.000000
mean    63.846154   375.790244

DataFrame with Calories as int:
      Duration  Pulse  Calories
0          60    110       409
1          60    117       479
2          60    103       340
3          45    109       282
4          45    117      