In [268]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt

**Reading the Data**

In [269]:
data = pd.read_csv('/content/beer-servings.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,0,Afghanistan,0.0,0.0,0.0,0.0,Asia
1,1,Albania,89.0,132.0,54.0,4.9,Europe
2,2,Algeria,25.0,0.0,14.0,0.7,Africa
3,3,Andorra,245.0,138.0,312.0,12.4,Europe
4,4,Angola,217.0,57.0,45.0,5.9,Africa


**Removing Null Data**

In [270]:
data['beer_servings'] = data['beer_servings'].fillna(data['beer_servings'].median())
data['beer_servings'].isna().sum()

0

In [271]:
for i in ['spirit_servings', 'wine_servings','total_litres_of_pure_alcohol']:
  data[i] = data[i].fillna(data[i].median())

In [272]:
data.isnull().sum()

Unnamed: 0                      0
country                         0
beer_servings                   0
spirit_servings                 0
wine_servings                   0
total_litres_of_pure_alcohol    0
continent                       0
dtype: int64

**Finding Outliers**

1. Beer Servings

In [273]:
Q1_b = np.percentile(data['beer_servings'],25, interpolation='midpoint')
Q2_b = np.percentile(data['beer_servings'],50, interpolation='midpoint')
Q3_b = np.percentile(data['beer_servings'],75, interpolation='midpoint')

In [274]:
IQR_b  = Q3_b - Q1_b
print(IQR_b)

150.0


In [275]:
low_lim_b = Q1_b-1.5*IQR_b
up_lim_b = Q3_b+1.5*IQR_b

In [276]:
print(low_lim_b)
print(up_lim_b)

-204.0
396.0


In [277]:
outlier_b = []
for x in data['beer_servings']:
  if((x>up_lim_b) or (x<low_lim_b)):
    outlier_b.append(x)

In [278]:
print('Outliers in beer servings data are:')
outlier_b

Outliers in beer servings data are:


[]

2. Spirit Servings

In [279]:
Q1 = np.percentile(data['spirit_servings'],25, interpolation='midpoint')
Q2 = np.percentile(data['spirit_servings'],50, interpolation='midpoint')
Q3 = np.percentile(data['spirit_servings'],75, interpolation='midpoint')

In [280]:
IQR  = Q3 - Q1
print(IQR)

123.0


In [281]:
low_lim = Q1-1.5*IQR
up_lim = Q3+1.5*IQR


In [282]:
print(low_lim)
print(up_lim)

-179.5
312.5


In [283]:
outlier = []
for x in data['spirit_servings']:
  if((x>up_lim) or (x<low_lim)):
    outlier.append(x)

In [284]:
print('Outliers in spirit servings data are:')
outlier

Outliers in spirit servings data are:


[373.0, 438.0, 326.0, 326.0, 315.0]

3. Wine Servings

In [285]:
Q1_w = np.percentile(data['wine_servings'],25, interpolation='midpoint')
Q2_w = np.percentile(data['wine_servings'],50, interpolation='midpoint')
Q3_w = np.percentile(data['wine_servings'],75, interpolation='midpoint')

In [286]:
IQR_w  = Q3_w - Q1_w
print(IQR_w)

58.0


In [287]:
low_lim_w = Q1_w-1.5*IQR_w
up_lim_w = Q3_w+1.5*IQR_w


In [288]:
print(low_lim_w)
print(up_lim_w)

-86.0
146.0


In [289]:
outlier_w = []
for x in data['wine_servings']:
  if((x>up_lim_w) or (x<low_lim_w)):
    outlier_w.append(x)

In [290]:
print('Outliers in wine servings data are:')
outlier_w

Outliers in wine servings data are:


[312.0,
 221.0,
 212.0,
 191.0,
 212.0,
 172.0,
 254.0,
 278.0,
 233.0,
 370.0,
 149.0,
 175.0,
 218.0,
 185.0,
 165.0,
 237.0,
 271.0,
 190.0,
 175.0,
 339.0,
 167.0,
 276.0,
 186.0,
 280.0,
 195.0,
 220.0]

4. Total litres of pure alcohol

In [291]:
Q1_t = np.percentile(data['total_litres_of_pure_alcohol'],25, interpolation='midpoint')
Q2_t = np.percentile(data['total_litres_of_pure_alcohol'],50, interpolation='midpoint')
Q3_t = np.percentile(data['total_litres_of_pure_alcohol'],75, interpolation='midpoint')

In [292]:
IQR_t  = Q3_t - Q1_t
print(IQR_t)

5.9


In [293]:
low_lim_t = Q1_t-1.5*IQR_t
up_lim_t = Q3_t+1.5*IQR_t

In [294]:
print(low_lim_t)
print(up_lim_t)

-7.550000000000002
16.05


In [295]:
outlier_t = []
for x in data['total_litres_of_pure_alcohol']:
  if((x>up_lim_t) or (x<low_lim_t)):
    outlier_t.append(x)

In [296]:
print('Outliers in total litres of pure alcohol data are:')
outlier_t

Outliers in total litres of pure alcohol data are:


[]