In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import tabula
import requests
import json
from database_utils import DatabaseConnector
from sqlalchemy import inspect

In [7]:
def list_number_of_stores(number_of_stores_endpoint, headers):
    try:
        response = requests.get(number_of_stores_endpoint, headers=headers)
        response.raise_for_status()  # Raise an exception for bad responses (4xx or 5xx)
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error listing number of stores: {e}")
        return None
    
def retrieve_stores_data(store_endpoint_pattern, headers, total_stores):
    all_stores_data = []

    for store_number in range(0, total_stores):
        store_endpoint = f"{store_endpoint_pattern}/{store_number}"
        try:
            response = requests.get(store_endpoint, headers=headers)
            response.raise_for_status()
            store_data = response.json()
            all_stores_data.append(store_data)
        except requests.exceptions.RequestException as e:
            print(f"Error retrieving data for store {store_number}: {e}")
            return None
    all_stores_df = pd.DataFrame(all_stores_data)
    all_stores_df = all_stores_df.set_index('index')
    return all_stores_df

# API details
number_of_stores_endpoint = "https://aqj7u5id95.execute-api.eu-west-1.amazonaws.com/prod/number_stores/"
store_endpoint_pattern = "https://aqj7u5id95.execute-api.eu-west-1.amazonaws.com/prod/store_details"
headers = {"x-api-key": "yFBQbwXe9J3sd6zWVAMrK6lcxxr0q1lr2PT6DDMX"}

number_of_stores = list_number_of_stores(number_of_stores_endpoint, headers)
total_stores = number_of_stores.get('number_stores', 0)
all_store_data = retrieve_stores_data(store_endpoint_pattern, headers, total_stores)

In [8]:
all_store_data.head()

Unnamed: 0_level_0,address,longitude,lat,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,,,,,WEB-1388012W,325,2010-06-12,Web Portal,,GB,Europe
1,"Flat 72W\nSally isle\nEast Deantown\nE7B 8EB, ...",51.62907,,High Wycombe,HI-9B97EE4E,34,1996-10-25,Local,-0.74934,GB,Europe
2,"Heckerstraße 4/5\n50491 Säckingen, Landshut",48.52961,,Landshut,LA-0772C7B9,92,2013-04-12,Super Store,12.16179,DE,Europe
3,"5 Harrison tunnel\nSouth Lydia\nWC9 2BE, Westbury",51.26,,Westbury,WE-1DE82CEE,69,2014-01-02,Super Store,-2.1875,GB,Europe
4,Studio 6\nStephen landing\nSouth Simon\nB77 2W...,53.0233,,Belper,BE-18074576,35,2019-09-09,Local,-1.48119,GB,Europe


In [9]:
all_store_data.describe(include="all")

Unnamed: 0,address,longitude,lat,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
count,451.0,451.0,11.0,451,451.0,451,451,451,450.0,451,451
unique,449.0,125.0,9.0,125,449.0,114,440,13,124.0,11,12
top,,53.46506,,Chapletown,,20,2004-01-23,Local,-1.47217,GB,Europe
freq,3.0,14.0,3.0,14,3.0,18,3,255,14.0,266,383


In [12]:
all_store_data.lat.unique()


array(['N/A', None, '13KJZ890JH', '2XE1OWOC23', 'NULL', 'OXVE5QR07O',
       'VKA5I8H32X', 'LACCWDI0SB', 'A3O5CBWAMD', 'UXMWDMX1LC'],
      dtype=object)

In [13]:
del all_store_data["lat"]

In [14]:
all_store_data.head()

Unnamed: 0_level_0,address,longitude,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,,,,WEB-1388012W,325,2010-06-12,Web Portal,,GB,Europe
1,"Flat 72W\nSally isle\nEast Deantown\nE7B 8EB, ...",51.62907,High Wycombe,HI-9B97EE4E,34,1996-10-25,Local,-0.74934,GB,Europe
2,"Heckerstraße 4/5\n50491 Säckingen, Landshut",48.52961,Landshut,LA-0772C7B9,92,2013-04-12,Super Store,12.16179,DE,Europe
3,"5 Harrison tunnel\nSouth Lydia\nWC9 2BE, Westbury",51.26,Westbury,WE-1DE82CEE,69,2014-01-02,Super Store,-2.1875,GB,Europe
4,Studio 6\nStephen landing\nSouth Simon\nB77 2W...,53.0233,Belper,BE-18074576,35,2019-09-09,Local,-1.48119,GB,Europe


In [15]:
all_store_data.describe(include="all")

Unnamed: 0,address,longitude,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
count,451.0,451.0,451,451.0,451,451,451,450.0,451,451
unique,449.0,125.0,125,449.0,114,440,13,124.0,11,12
top,,53.46506,Chapletown,,20,2004-01-23,Local,-1.47217,GB,Europe
freq,3.0,14.0,14,3.0,18,3,255,14.0,266,383


In [16]:
all_store_data.country_code.unique()

array(['GB', 'DE', 'US', 'YELVM536YT', 'FP8DLXQVGH', 'NULL', 'HMHIFNLOBN',
       'F3AO8V2LHU', 'OH20I92LX3', 'OYVW925ZL8', 'B3EH2ZGQAV'],
      dtype=object)

In [17]:
all_store_data[~all_store_data['country_code'].isin(["GB", "DE", "US"])]

Unnamed: 0_level_0,address,longitude,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
63,6FWDZHD7PW,1ZVU03X2P6,9IBH8Y4Z0S,NRQKZWJ9OZ,BIP8K8JJW2,ZCXWWKF45G,QP74AHEQT0,1CJ5OAU4BR,YELVM536YT,QMAVR5H3LD
172,UBCIFQLSNY,Q1TJY8H1ZH,1T6B406CI8,QIUU9SVP51,SKBXAXF5G5,7AHXLXIUEF,O0QJIRC943,3ZR3F89D97,FP8DLXQVGH,LU3E036ZD9
217,,,,,,,,,,
231,RC99UKMZB2,2YBZ1440V6,6LVWPU1G64,Y8J0Z2W8O9,2429OB3LMM,0OLAK2I6NS,50IB01SFAZ,L13EQEQODP,HMHIFNLOBN,5586JCLARW
333,X349GIDWKU,O7NF1FZ74Y,RX9TCP2RGB,ISEE8A57FE,74BY7HSB6P,A3PMVM800J,0RSNUU3DF5,J3BPB68Z1J,F3AO8V2LHU,GFJQ2AAEQ8
381,ZBGB54ID4H,SKO4NMRNNF,CQMHKI78BX,T0R2CQBDUS,GT1FO6YGD4,GMMB02LA9V,B4KVQB3P5Y,AJHOMDOHZ4,OH20I92LX3,SLQBD982C0
405,,,,,,,,,,
414,XTUAV57DP4,ID819KG3X5,RY6K0AUE7F,TUOKF5HAAQ,FRTGHAA34B,13PIY8GD1H,X0FE7E2EOG,AE7EEW4HSS,OYVW925ZL8,XQ953VS0FG
437,,,,,,,,,,
447,K0ODETRLS3,K8CXLZDP07,3VHFDNP8ET,9D4LK7X4LZ,D23PCWSM6S,36IIMAQD58,NN04B3F6UQ,JZP8MIJTPZ,B3EH2ZGQAV,1WZB1TE1HL


In [18]:
keep_values = ["GB", "DE", "US"]

# Keep rows that contain specified values in the 'time_period' column
all_store_data = all_store_data[all_store_data['country_code'].isin(keep_values)]

In [19]:
all_store_data[~all_store_data['country_code'].isin(["GB", "DE", "US"])]

Unnamed: 0_level_0,address,longitude,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [20]:
all_store_data.describe(include="all")

Unnamed: 0,address,longitude,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
count,441.0,441.0,441,441,441,441,441,440.0,441,441
unique,441.0,117.0,117,441,106,432,5,116.0,3,4
top,,53.46506,Chapletown,WEB-1388012W,20,2004-01-23,Local,-1.47217,GB,Europe
freq,1.0,14.0,14,1,18,3,255,14.0,266,383


In [21]:
all_store_data.continent.unique()

array(['Europe', 'America', 'eeEurope', 'eeAmerica'], dtype=object)

In [22]:
all_store_data["continent"] = all_store_data["continent"].str.replace("eeEurope", "Europe")
all_store_data["continent"] = all_store_data["continent"].str.replace("eeAmerica", "America")
all_store_data.continent.unique()

array(['Europe', 'America'], dtype=object)

In [23]:
all_store_data.store_code.unique()

array(['WEB-1388012W', 'HI-9B97EE4E', 'LA-0772C7B9', 'WE-1DE82CEE',
       'BE-18074576', 'GA-CAD01AC2', 'RU-C603E990', 'ST-229D997E',
       'KA-FA7ED3B8', 'HA-974352FE', 'RU-9F1136B4', 'SI-ECD52CD9',
       'DE-585399CF', 'CR-792AA8BB', 'HA-39A446E2', 'LA-9B0D9277',
       'NE-1D8B1D0C', 'LY-4C3D5D6C', 'CH-6A561423', 'BA-898BDED3',
       'EA-77ECA680', 'PO-38790FAE', 'BU-251A0E5A', 'EH-91356030',
       'AB-917B715E', 'SU-0B4C9A5F', 'IN-157E1191', 'GA-DA8EEA4A',
       'LA-2B59A825', 'WE-31C8B335', 'LA-F1042C48', 'EA-24B31935',
       'EA-7965E06D', 'PE-040B15C3', 'EH-DB8676C1', 'AL-91D3A03C',
       'HE-E39F4BC6', 'PO-47A01287', 'LA-D78C5F3F', 'ME-31958763',
       'AR-5E72668B', 'CO-CB3D8C89', 'CL-5C7C3198', 'BO-17E7B6CE',
       'CH-619E036C', 'BR-BC499EDD', 'NE-E50207AD', 'BE-8C0CF738',
       'NE-374D3983', 'SU-95D20AE9', 'ME-FB62E459', 'KA-653E783F',
       'MA-F0E23355', 'OS-70B2CD28', 'KI-A53AF10A', 'LE-63F3D33B',
       'VE-93DA8430', 'HI-BAD4DD1C', 'RU-1994A94D', 'LE-84C48

In [27]:
all_store_data[all_store_data.isnull().any(axis=1)]

Unnamed: 0_level_0,address,longitude,locality,store_code,staff_numbers,opening_date,store_type,latitude,country_code,continent
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,,,,WEB-1388012W,325,2010-06-12,Web Portal,,GB,Europe
