Perform the following operations using Python on the Telecom_Churn
dataset. Compute and display summary statistics for each feature available
in the dataset using separate commands for each statistic. (e.g. minimum
value, maximum value, mean, range, standard deviation, variance and
percentiles).

In [1]:
import pandas as pd
import numpy as np

# Set display option to show all rows if needed
pd.set_option('display.max_rows', None)

In [2]:
# Load the dataset
# Ensure 'Telecom Churn.csv' is in the same directory
df = pd.read_csv('Telecom Churn.csv')

# Display first 5 rows to verify
print("Dataset Head:")
print(df.head())

# Select only numerical columns for statistical calculations
# This excludes text columns like 'state', 'phone number', etc.
numeric_df = df.select_dtypes(include=['number'])
print("\nNumerical Columns selected for analysis:")
print(numeric_df.columns.tolist())

Dataset Head:
  state  account length  area code phone number international plan  \
0    KS             128        415     382-4657                 no   
1    OH             107        415     371-7191                 no   
2    NJ             137        415     358-1921                 no   
3    OH              84        408     375-9999                yes   
4    OK              75        415     330-6626                yes   

  voice mail plan  number vmail messages  total day minutes  total day calls  \
0             yes                     25              265.1              110   
1             yes                     26              161.6              123   
2              no                      0              243.4              114   
3              no                      0              299.4               71   
4              no                      0              166.7              113   

   total day charge  ...  total eve calls  total eve charge  \
0             45.07  

In [3]:
# Calculate Minimum value for each numerical feature
min_vals = numeric_df.min()

print("--- Minimum Values ---")
print(min_vals)

--- Minimum Values ---
account length              1.00
area code                 408.00
number vmail messages       0.00
total day minutes           0.00
total day calls             0.00
total day charge            0.00
total eve minutes           0.00
total eve calls             0.00
total eve charge            0.00
total night minutes        23.20
total night calls          33.00
total night charge          1.04
total intl minutes          0.00
total intl calls            0.00
total intl charge           0.00
customer service calls      0.00
dtype: float64


In [4]:
# Calculate Maximum value for each numerical feature
max_vals = numeric_df.max()

print("--- Maximum Values ---")
print(max_vals)

--- Maximum Values ---
account length            243.00
area code                 510.00
number vmail messages      51.00
total day minutes         350.80
total day calls           165.00
total day charge           59.64
total eve minutes         363.70
total eve calls           170.00
total eve charge           30.91
total night minutes       395.00
total night calls         175.00
total night charge         17.77
total intl minutes         20.00
total intl calls           20.00
total intl charge           5.40
customer service calls      9.00
dtype: float64


In [5]:
# Calculate Mean (Average) for each numerical feature
mean_vals = numeric_df.mean()

print("--- Mean Values ---")
print(mean_vals)

--- Mean Values ---
account length            101.064806
area code                 437.182418
number vmail messages       8.099010
total day minutes         179.775098
total day calls           100.435644
total day charge           30.562307
total eve minutes         200.980348
total eve calls           100.114311
total eve charge           17.083540
total night minutes       200.872037
total night calls         100.107711
total night charge          9.039325
total intl minutes         10.237294
total intl calls            4.479448
total intl charge           2.764581
customer service calls      1.562856
dtype: float64


In [6]:
# Calculate Range (Max - Min) for each numerical feature
# We use the previously calculated min and max variables
range_vals = max_vals - min_vals

print("--- Range (Max - Min) ---")
print(range_vals)

--- Range (Max - Min) ---
account length            242.00
area code                 102.00
number vmail messages      51.00
total day minutes         350.80
total day calls           165.00
total day charge           59.64
total eve minutes         363.70
total eve calls           170.00
total eve charge           30.91
total night minutes       371.80
total night calls         142.00
total night charge         16.73
total intl minutes         20.00
total intl calls           20.00
total intl charge           5.40
customer service calls      9.00
dtype: float64


In [7]:
# Calculate Standard Deviation for each numerical feature
std_vals = numeric_df.std()

print("--- Standard Deviation ---")
print(std_vals)

--- Standard Deviation ---
account length            39.822106
area code                 42.371290
number vmail messages     13.688365
total day minutes         54.467389
total day calls           20.069084
total day charge           9.259435
total eve minutes         50.713844
total eve calls           19.922625
total eve charge           4.310668
total night minutes       50.573847
total night calls         19.568609
total night charge         2.275873
total intl minutes         2.791840
total intl calls           2.461214
total intl charge          0.753773
customer service calls     1.315491
dtype: float64


In [8]:
# Calculate Variance for each numerical feature
var_vals = numeric_df.var()

print("--- Variance ---")
print(var_vals)

--- Variance ---
account length            1585.800121
area code                 1795.326257
number vmail messages      187.371347
total day minutes         2966.696487
total day calls            402.768141
total day charge            85.737128
total eve minutes         2571.894016
total eve calls            396.910999
total eve charge            18.581856
total night minutes       2557.714002
total night calls          382.930472
total night charge           5.179597
total intl minutes           7.794368
total intl calls             6.057576
total intl charge            0.568173
customer service calls       1.730517
dtype: float64


In [9]:
# Calculate Percentiles (25%, 50%, 75%) for each numerical feature
# 25th Percentile
p25 = numeric_df.quantile(0.25)

# 50th Percentile (Median)
p50 = numeric_df.quantile(0.50)

# 75th Percentile
p75 = numeric_df.quantile(0.75)

print("--- 25th Percentiles ---")
print(p25)
print("\n--- 50th Percentiles (Median) ---")
print(p50)
print("\n--- 75th Percentiles ---")
print(p75)

--- 25th Percentiles ---
account length             74.00
area code                 408.00
number vmail messages       0.00
total day minutes         143.70
total day calls            87.00
total day charge           24.43
total eve minutes         166.60
total eve calls            87.00
total eve charge           14.16
total night minutes       167.00
total night calls          87.00
total night charge          7.52
total intl minutes          8.50
total intl calls            3.00
total intl charge           2.30
customer service calls      1.00
Name: 0.25, dtype: float64

--- 50th Percentiles (Median) ---
account length            101.00
area code                 415.00
number vmail messages       0.00
total day minutes         179.40
total day calls           101.00
total day charge           30.50
total eve minutes         201.40
total eve calls           100.00
total eve charge           17.12
total night minutes       201.20
total night calls         100.00
total night charge    