In [1]:
import pandas as pd

dataSet = pd.read_csv('data.csv')

# Showing basic statistical description of the data
print(dataSet.describe())


         Duration       Pulse    Maxpulse     Calories
count  169.000000  169.000000  169.000000   164.000000
mean    63.846154  107.461538  134.047337   375.790244
std     42.299949   14.510259   16.450434   266.379919
min     15.000000   80.000000  100.000000    50.300000
25%     45.000000  100.000000  124.000000   250.925000
50%     60.000000  105.000000  131.000000   318.600000
75%     60.000000  111.000000  141.000000   387.600000
max    300.000000  159.000000  184.000000  1860.400000


In [None]:
# Checking if the data has null values
dataSet.isnull().any()

Duration    False
Pulse       False
Maxpulse    False
Calories     True
dtype: bool

In [None]:
# Replacing the null values with the mean
dataSet.fillna(dataSet.mean(), inplace=True)
dataSet.isnull().any()

Duration    False
Pulse       False
Maxpulse    False
Calories    False
dtype: bool

In [None]:
# Aggregating the data using min, max, count, and mean
agg_dataSet = dataSet[['Duration', 'Calories']].agg(['min', 'max', 'count', 'mean'])
print(agg_dataSet)

         Duration     Calories
min     15.000000    50.300000
max    300.000000  1860.400000
count  169.000000   169.000000
mean    63.846154   375.790244


In [None]:
# Filtering the dataframe to select rows with calories between 500 and 1000
filtered_dataSet = dataSet[(dataSet['Calories'] >= 500) & (dataSet['Calories'] <= 1000)]
print(filtered_dataSet)

     Duration  Pulse  Maxpulse  Calories
51         80    123       146     643.1
62        160    109       135     853.0
65        180     90       130     800.4
66        150    105       135     873.4
67        150    107       130     816.0
72         90    100       127     700.0
73        150     97       127     953.2
75         90     98       125     563.2
78        120    100       130     500.4
83        120    100       130     500.0
90        180    101       127     600.1
99         90     93       124     604.1
101        90     90       110     500.0
102        90     90       100     500.0
103        90     90       100     500.4
106       180     90       120     800.3
108        90     90       120     500.3


In [None]:
# Filtering the dataframe to select rows with calories > 500 and pulse < 100
filtered_dataSet2 = dataSet[(dataSet['Calories'] > 500) & (dataSet['Pulse'] < 100)]
print(filtered_dataSet2)

     Duration  Pulse  Maxpulse  Calories
65        180     90       130     800.4
70        150     97       129    1115.0
73        150     97       127     953.2
75         90     98       125     563.2
99         90     93       124     604.1
103        90     90       100     500.4
106       180     90       120     800.3
108        90     90       120     500.3


In [None]:
# Creating a new df_modified dataframe without Maxpulse column
dataSet_modified = dataSet.drop(columns=['Maxpulse'])

# Printing  the created new dataframe
print(dataSet_modified)

     Duration  Pulse  Calories
0          60    110     409.1
1          60    117     479.0
2          60    103     340.0
3          45    109     282.4
4          45    117     406.0
..        ...    ...       ...
164        60    105     290.8
165        60    110     300.0
166        60    115     310.2
167        75    120     320.4
168        75    125     330.4

[169 rows x 3 columns]


In [None]:
# Deleting Maxpulse column from the main dataframe
dataSet.drop(columns=['Maxpulse'], inplace=True)
print(dataSet)

     Duration  Pulse  Calories
0          60    110     409.1
1          60    117     479.0
2          60    103     340.0
3          45    109     282.4
4          45    117     406.0
..        ...    ...       ...
164        60    105     290.8
165        60    110     300.0
166        60    115     310.2
167        75    120     320.4
168        75    125     330.4

[169 rows x 3 columns]


In [None]:
# Converting datatype of Calories column to int
dataSet['Calories'] = dataSet['Calories'].astype(int)
dataSet.dtypes

Duration    int64
Pulse       int64
Calories    int64
dtype: object