In [None]:
#start by loading all required for work libraries:
import pandas as pd
import numpy as np

#check whether data loaded correctly from .csv file:
def load_and_check():
    url = "https://drive.google.com/uc?id=1gf3l8scZG2roG41qTau4_p5UEwUPHM6I&export=download"
    data = pd.read_csv(url)
    if data.shape[1] != 17:
      print('Please check if data was loaded properly, different shape was expected.')
    else:
      print('Data was loaded properly.')

    # use SQL logic and introduce such statistical concepts as mean(arithmetic average value or simple average) and standard deviation(std) to check whether sales data correct:
    grouped_data = data.groupby(['Date'])['Total'].agg(['mean', 'std'])

    # triple standard deviation is used to check whether data distributed normally:
    grouped_data['threshold'] = 3 * grouped_data['std']

    # to do this, introduce new columns(max and min), based on mean and std:
    grouped_data['max'] = grouped_data['mean']+grouped_data['threshold']
    grouped_data['min'] = grouped_data[['mean','threshold']].apply(lambda row:max(0, row['mean']-row['threshold']), axis = 1)

    # merge newly created columns with the old ones, using SQL logic:
    data = pd.merge(data, grouped_data, on = 'Date', how = 'left')

    # introduce new "Tax_calculated" column, calculated manually, as original "Tax" column one had "0" values, which possibly affected correctness of "Total" values:
    data['Tax_calculated'] = data['Unit price'] * data['Quantity'] * 0.05

    # the data is supposed to be less than max(mean+3*std) and more than min(mean-3*std), in order to satisfy normal distribution condition:
    data['Condition_1'] = (data['Total'] >= data['min']) & (data['Total'] <= data['max'])
    data['Condition_1'].fillna(False, inplace = True)

    # one more integrity check, to ensure data in the columns logically correct:
    data['Condition_2'] = data['Tax'] + data['Unit price'] * data['Quantity'] == data['Total']
    data['Condition_3'] = data['Total'] - data['Unit price'] * data['Quantity'] == data['Tax_calculated']

    # in order to provide robust data integrity check, all 3 conditions should be satisfied:
    if data['Condition_1'].all() and data['Condition_2'].all() and data['Condition_3'].all():
      print('Data loaded successfully.The pipeline is belissimo.')
    else:
      print("Houston, we have problems! The pipeline integriry check failed.")
    return data
data = load_and_check()
data

Data was loaded properly.
Houston, we have problems! The pipeline integriry check failed.


Unnamed: 0,Invoice ID,Branch,City,Customer type,Gender,Product line,Unit price,Quantity,Tax,Total,...,Rating,mean,std,threshold,max,min,Tax_calculated,Condition_1,Condition_2,Condition_3
0,101-17-6199,A,Yangon,Normal,Male,Food and beverages,45.79,7,0.0000,336.5565,...,7.0,206.360700,145.045037,435.135112,641.495812,0,16.0265,True,False,False
1,101-81-4070,C,Naypyitaw,Member,Female,Health and beauty,62.82,2,6.2820,131.9220,...,4.9,285.705000,270.978942,812.936827,1098.641827,0,6.2820,True,True,False
2,102-06-2002,C,Naypyitaw,Member,Male,Sports and travel,25.25,5,0.0000,132.5625,...,6.1,363.880300,265.863137,797.589410,1161.469710,0,6.3125,True,False,True
3,102-77-2261,C,Naypyitaw,Member,Male,Health and beauty,65.31,7,0.0000,480.0285,...,4.2,366.522265,250.589063,751.767190,1118.289455,0,22.8585,True,False,False
4,105-10-6182,A,Yangon,Member,Male,Fashion accessories,21.48,2,2.1480,45.1080,...,6.6,418.532250,274.813848,824.441545,1242.973795,0,2.1480,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,894-41-5205,C,Naypyitaw,Normal,Female,Food and beverages,43.18,8,17.2720,362.7120,...,8.3,307.170281,192.983717,578.951150,886.121431,0,17.2720,True,True,False
996,895-03-6665,B,Mandalay,Normal,Female,Fashion accessories,36.51,9,16.4295,345.0195,...,4.2,312.970875,223.555621,670.666864,983.637739,0,16.4295,True,True,False
997,895-66-0685,B,Mandalay,Member,Male,Food and beverages,18.08,3,0.0000,56.9520,...,8.0,366.522265,250.589063,751.767190,1118.289455,0,2.7120,True,False,False
998,896-34-0956,A,Yangon,Normal,Male,Fashion accessories,21.32,1,1.0660,22.3860,...,5.9,262.206618,203.917422,611.752266,873.958883,0,1.0660,True,True,False
