In [1]:
#Using python 3.8.5
#!pip install flatten-json
#!pip install pytest
#!pip install ipytest

import warnings

warnings.filterwarnings("ignore") #just to keep clean
import pandas as pd
from flatten_json import flatten
import requests
import pytest
import ipytest

ipytest.autoconfig()

#getting JSON data and flattening
def get_clp_places(URL):
    try:
        re=requests.get(URL).json()                  #API call
        dict_data=(flatten(rec,".") for rec in re)   #Making into dict
        df=pd.DataFrame(dict_data)                   #Dict to DataFrame
        return df
    except:
        raise NotImplementedError("You need to develop this method!")

df_clp = get_clp_places("https://ecgplacesmw.colruytgroup.com/ecgplacesmw/v3/nl/places/filter/clp-places")
df_clp.head(10)

Unnamed: 0,placeId,ensign.id,ensign.name,commercialName,branchId,sourceStatus,placeType.id,placeType.longName,placeType.placeTypeDescription,sellingPartners.0,...,placeSearchOpeningHours.0.date,placeSearchOpeningHours.0.opens,placeSearchOpeningHours.0.closes,placeSearchOpeningHours.0.isToday,placeSearchOpeningHours.0.isOpenForTheDay,placeSearchOpeningHours.1.date,placeSearchOpeningHours.1.opens,placeSearchOpeningHours.1.closes,placeSearchOpeningHours.1.isToday,placeSearchOpeningHours.1.isOpenForTheDay
0,902,8,COLR_Colruyt,AALST (COLRUYT),4156,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
1,946,8,COLR_Colruyt,AALTER (COLRUYT),4218,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
2,950,8,COLR_Colruyt,AARSCHOT (COLRUYT),4222,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
3,886,8,COLR_Colruyt,ALSEMBERG (COLRUYT),4138,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
4,783,8,COLR_Colruyt,AMAY (COLRUYT),3853,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
5,650,8,COLR_Colruyt,ANDENNE (COLRUYT),3596,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
6,669,8,COLR_Colruyt,ANDERLECHT (HERBETTELN) COLR,3620,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
7,744,8,COLR_Colruyt,ANDERLECHT (VEEWEYDE) COLRUYT,3759,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
8,448,8,COLR_Colruyt,ANDERLUES (COLRUYT),3074,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True
9,681,8,COLR_Colruyt,ANS (COLRUYT),3644,AC,1,Winkel,Winkel,QUALITY,...,22-12-2022,830,2000,True,True,23-12-2022,800,2100,False,True


In [2]:
df_clp.info() # just to look at all the columns and dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 257 entries, 0 to 256
Data columns (total 33 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   placeId                                    257 non-null    int64  
 1   ensign.id                                  257 non-null    int64  
 2   ensign.name                                257 non-null    object 
 3   commercialName                             257 non-null    object 
 4   branchId                                   257 non-null    object 
 5   sourceStatus                               257 non-null    object 
 6   placeType.id                               257 non-null    int64  
 7   placeType.longName                         257 non-null    object 
 8   placeType.placeTypeDescription             257 non-null    object 
 9   sellingPartners.0                          251 non-null    object 
 10  sellingPartners.1         

In [3]:
#Quality Checks

#Checking the total records if > 200
def test_record_check():
    assert len(df_clp) > 200
    
#Checking latitude max and min values are within bounds
def test_min_max_latitude():
    assert 49 < df_clp['geoCoordinates.latitude'].min()
    assert 52 > df_clp['geoCoordinates.latitude'].max()
    
#Checking longitude max and min values are within bounds
def test_min_max_longitude():
    assert 2 < df_clp['geoCoordinates.longitude'].min()
    assert 7 > df_clp['geoCoordinates.longitude'].max()


In [4]:
# Feature creation - 'antwerpen'

def check_name(x):
    result=0
    if "antwerpen" in x.lower():
        result=1
    return result
    
df_clp["antwerpen"]=df_clp['address.cityName'].apply(check_name)


#Checking if the column existis in the DataFrame.
def test_check_column():
    assert {"antwerpen"}.issubset(df_clp.columns)
    
#Checking if both 0 and 1 exists in column.
def test_values_in_cols():
    assert df_clp["antwerpen"].unique().tolist() == [0,1]

# Running all tests
ipytest.run()

[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                                        [100%][0m
[32m[32m[1m5 passed[0m[32m in 0.27s[0m[0m


<ExitCode.OK: 0>

In [5]:
df_clp["antwerpen"].value_counts()

0    251
1      6
Name: antwerpen, dtype: int64