In [6]:
from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import urlparse, parse_qs # To extract the query parameters in Milestone 3
from datetime import datetime
import os
import json
import pandas as pd

#You should not change this lines.
#They define the parameters required to run the server.
hostName = "localhost"
serverPort = 8080


#MILESTONE 1
#endpoint => http://localhost:8080/test_connectivity_M1

#You just need to test the connectivity that returns the following JSON
#{"results": [{"Connectivity": "OK"}]}

#JSON TO DISPLAY IN CLIENT:
#{"results": [{"Connectivity": "OK"}]}

def test_connectivity_M1(text):
    results = {"results": [{"Connectivity": "OK"}]}
    api_answer = json.dumps(results, ensure_ascii=False)
    output_code = 200
    return api_answer, output_code

#MILESTONE 2
#endpoint => http://localhost:8080/count_countries_M2
#Country Count. Provide the number of entries per country 
#in the dataset in a sorted way. First the most frequent country.
#TEXT TO DISPLAY IN CLIENT:
# 1: TR(317)
# 2: DE(317)
# 3: NO(315)
# 4: BR(312)
# 5: IR(311)
# 6: CA(297)
# 7: FR(297)
# 8: CH(296)
# 9: IE(293)
# 10: DK(290)
# 11: NZ(288)
# 12: AU(285)
# 13: ES(281)
# 14: GB(278)
# 15: NL(276)
# 16: FI(276)
# 17: US(271)

def country_count_M2(text):

    # Count the number of users per country
    country_counters = {}

    # To access the data from the JSON file
    with open("json_rand_users_5000.json", "r") as json_file:
        data = json.load(json_file)["results"]

    # Iterate through the data
    for user in data:
        country = user.get("nat") # Get the country of each user
        if country:
            country_counters[country] = country_counters.get(country, 0) + 1
            # ^^^ Add to the counter of that certain country

    # Sort the countries by user count in descending order so that it displays it as the given text
    sorted_countries = sorted(country_counters.items(), key=lambda x: x[1], reverse=True)

    # Create the JSON response
    results = {"results": [{"Country": country, "User Count": counter} for country, counter in sorted_countries]}

    api_answer = json.dumps(results, ensure_ascii=False)
    output_code = 200

    return api_answer, output_code
    
#MILESTONE 3    
#endpoint=>http://localhost:8080/count_countries_M3?countries=DK,ES,GB,IE,IR,NO,NL,NZ

#It is the same than before but in case it just returns the result for the provided countries.
#The list of available countries in the raduser API (field nat) is:
#AU, BR, CA, CH, DE, DK, ES, FI, FR, GB, IE, IR, NO, NL, NZ, TR, US
#If not countries parameter is provided you have to provide the results for all the countries
#NOTE: NO NEED TO IMPLEMENT CONTROL OF ERRORS

#TEXT TO DISPLAY IN CLIENT THAT ONLY CONSIDERS THE COUNTRIES IN THE QUERY:
# 1: NO(315)
# 2: IR(311)
# 3: IE(293)
# 4: DK(290)
# 5: NZ(288)
# 6: ES(281)
# 7: GB(278)
# 8: NL(276)

def country_count_M3(text): 
      
    # Extract the list of countries from the query
    parsed_url = urlparse(text)
    query_parameters = parse_qs(parsed_url.query)

    # Put the countries in a list well separated
    countries_param = query_parameters.get("countries", [])
    countries_list = countries_param[0].split(",") if countries_param else []

    # To access the data from the JSON file
    with open("json_rand_users_5000.json", "r") as json_file:
        data = json.load(json_file)["results"]

    # Create the dictionary that will have each country and its counter
    country_counters = {}

    # Iterate through the data and count the number of users per selected country
    for user in data:
        country = user.get("nat") # Get the country of each user
        if country and country in countries_list:
            country_counters[country] = country_counters.get(country, 0) + 1
            # ^^^ Add to the counter of that certain country

    # Sort the countries by user count in descending order so that it displays it as the given text
    sorted_countries = sorted(country_counters.items(), key=lambda x: x[1], reverse=True)

    # Create the JSON response
    results = {"results": [{"Country": country, "User Count": count} for country, count in sorted_countries]}

    api_answer = json.dumps(results, ensure_ascii=False)
    output_code = 200
    
    return api_answer, output_code

#MILESTONE 4    
#endpoint=>http://localhost:8080/age_groups_M4 
#You need to deliver the number of users in the data set
#within the following age groups: <18, 18-29, 30-49, 50-64, >64.

#ANSWER IN CLIENT
# Age <18: 0
# Age 18-29: 634
# Age 30-49: 1762
# Age 50-64: 1452
# Age >64: 1152

def age_groups_M4(text):
  
    # Extract the list of countries from the query
    parsed_url = urlparse(text)
    query_parameters = parse_qs(parsed_url.query)

    # Put the ages in a list well separated
    #age_param = query_parameters.get("dob", [])
    #age_list = age_param[0].split(",") if age_param else []

    # To access the data from the JSON file
    with open("json_rand_users_5000.json", "r") as json_file:
        data = json.load(json_file)["results"]
        
    
    # Create the dictionary that will have each range of age and its counter
    age_counter = {
    "Age <18": 0,
    "Age 18-29": 0,
    "Age 30-49": 0,
    "Age 50-64": 0,
    "Age >64": 0}

    for user in data:
        age = user.get("dob", {}).get("age", 0)
        if age < 18:
            age_counter["Age <18"] += 1
        elif 18 <= age <= 29:
            age_counter["Age 18-29"] += 1
        elif 30 <= age <= 49:
            age_counter["Age 30-49"] += 1
        elif 50 <= age <= 64:
            age_counter["Age 50-64"] += 1
        else:
            age_counter["Age >64"] += 1


    # Create the JSON response
    results = {"results": [{"Age": "Age <18", "User Count": age_counter.get("Age <18", 0)},
    {"Age": "Age 18-29", "User Count": age_counter.get("Age 18-29", 0)},
    {"Age": "Age 30-49", "User Count": age_counter.get("Age 30-49", 0)},
    {"Age": "Age 50-64", "User Count": age_counter.get("Age 50-64", 0)},
    {"Age": "Age >64", "User Count": age_counter.get("Age >64", 0)}]}

    api_answer = json.dumps(results, ensure_ascii=False)
    output_code = 200
    return api_answer,output_code


##MILESTONE 5

def users_M5(text):

    # Extract the parameters
    parsed_url = urlparse(text)
    query_parameters = parse_qs(parsed_url.query)

    # Store the countries we want to filter in a list
    countries_param = query_parameters.get("countries", [])
    if countries_param != []:
        countries_list = countries_param[0].split(",") 
    else:
        countries_list = ["AU", "BR", "CA", "CH", "DE", "DK", "ES", "FI", "FR", "GB", "IE", "IR", "NO", "NL", "NZ", "TR", "US"]
    
    # Store the genders we want to filter in a list
    gender_param = query_parameters.get("gender", [])
    if gender_param != []:
        gender_list = gender_param[0].split(",") 
    else:
        gender_list = ['male', 'female']
    
    # Store the ages we want to filter in a list
    age_param = query_parameters.get("age", [])
    if age_param != []:
        age_list = age_param[0].replace('[','').replace(']','').split(', ')
        age_list = [int(i) for i in age_list] # Put it as integers because in the json files is a number
    else:
        age_list = list(range(18, 100))

    # Store the months we want to filter in a list
    month_param = query_parameters.get("month", [])
    if month_param != []:
        month_list = month_param[0].split(',')
        month_list = [int(i) for i in month_list] # Put it as integers because in the json files is a number
    else:
        month_list = list(range(1, 13))

    # To access the data from the JSON file
    with open("json_rand_users_5000.json", "r") as json_file:
        data = json.load(json_file)["results"]

    filtered_data = [] # Where we will add the users that meet our conditions

    for user in data:
        date_dob = datetime.strptime(user['dob']['date'], '%Y-%m-%dT%H:%M:%S.%fZ') # So we can access easily to the month
        if (user['nat'] in countries_list) and (user['gender'] in gender_list) and (date_dob.month in month_list) and (user['dob']['age'] in age_list):
            filtered_data.append(user)
    
    results = {"results": filtered_data}

    api_answer = json.dumps(results, ensure_ascii=False)
    output_code = 200

    return api_answer, output_code


##MILESTONE 6

def users_control_error_M6(text):

    # Define the valid values for each parameter
    valid_countries = ["AU", "BR", "CA", "CH", "DE", "DK", "ES", "FI", "FR", "GB", "IE", "IR", "NO", "NL", "NZ", "TR", "US"]
    valid_genders = ["female", "male"]
    valid_months = list(range(1, 13))
    valid_ages = list(range(18, 100))

    # Extract the parameters
    parsed_url = urlparse(text)
    query_parameters = parse_qs(parsed_url.query)

    # Error lists
    country_errors = []
    gender_errors = []
    month_errors = []
    age_errors = []
    field_errors = []

    # Check for country errors
    countries_param = query_parameters.get("countries", [])
    if countries_param != []:
        countries_list = countries_param[0].split(",") 
    else:
        countries_list = valid_countries

    for country in countries_list:
        if country not in valid_countries:
            country_errors.append(country)

    # Check for gender errors
    gender_param = query_parameters.get("gender", [])
    if countries_param != []:
        gender_list = countries_param[0].split(",") 
    else:
        gender_list = valid_genders

    for gender in gender_list:
        if gender not in valid_genders:
            gender_errors.append(gender)

    # Check for month errors
    month_param = query_parameters.get("month", [])
    if month_param != []:
        month_list = month_param[0].split(',')
        month_list = [int(i) for i in month_list] # Put it as integers because in the json files is a number
    else:
        month_list = list(range(1, 13))
      
    for month in month_list:
        if month not in valid_months:
            month_errors.append(str(month))

    # Check for age errors
    age_param = query_parameters.get("age", [])
    if age_param != []:
        age_list = age_param[0].replace('[','').replace(']','').split(', ')
        age_list = [int(i) for i in age_list] # Put it as integers because in the json files is a number
    else:
        age_list = list(range(18, 100))
    
    for age in age_list:
        if age not in valid_ages:
            age_errors.append(str(age))

    # Check for field errors
    valid_fields = {"countries", "gender", "age", "month"}
    for field in query_parameters:
        if field not in valid_fields:
            field_errors.append(field)


    error_response = {}
    #IF THERE IS AN ERROR ONLY IN 1 or 2 FIELDS THE JSON SHOULD ONLY INCLUDE THOSE ERRORS
    if country_errors:
        error_response["Country Error"] = "The following country IDs are not supported: " + ",".join(country_errors)
    
    if gender_errors:
        error_response["Gender Error"] = "The following gender options are not supported: " + ",".join(gender_errors)
    
    if month_errors:
        error_response["Month Error"] = "The following month options are not supported: " + ",".join(month_errors)
    
    if age_errors:
        error_response["Age Error"] = "The following age options are not supported: " + ",".join(age_errors)
    
    if field_errors:
        error_response["Field Error"] = "The following fields are not valid: " + ",".join(field_errors)
    
    if error_response:
        response = {"results": error_response}
        api_answer = json.dumps(response, ensure_ascii=False)
        output_code = 400 
        
    else:
        # Correct query
        with open("json_rand_users_5000.json", "r") as json_file:
            data = json.load(json_file)["results"]

        filtered_data = []

        for user in data:
            date_dob = datetime.strptime(user['dob']['date'], '%Y-%m-%dT%H:%M:%S.%fZ')
            if (user['nat'] in countries_list) and (user['gender'] in gender_list) and (date_dob.month in month_list) and (user['dob']['age'] in age_list):
                filtered_data.append(user)

        results = {"results": filtered_data}
        api_answer = json.dumps(results, ensure_ascii=False)
        output_code = 200

    return api_answer, output_code
    

##MILESTONE 7

def access_token_M7(text):

    # Extract the parameters
    parsed_url = urlparse(text)
    query_parameters = parse_qs(parsed_url.query)
    
    user_id = query_parameters.get("user_ID", [None])[0]
    access_token = query_parameters.get("access_token", [None])[0]
    
    # Check if there are no fields included 
    if not user_id or not access_token:
        error_response = {"results": []}
        if not user_id:
            error_response["results"].append({"user_ID Error": "user_ID field not included in the parameters."})
        if not access_token:
            error_response["results"].append({"Access_Token Error": "Access_Token field not included in the parameters."})
        
        response = {"results": error_response}
        api_answer = json.dumps(response, ensure_ascii=False)
        output_code = 400 
        return api_answer, output_code

    else:

        user_id = int(user_id)
        path = 'file_access_token_M7.csv'
        df = pd.read_csv(path,sep=';')

        # Check if the user ID exists in the file
        if user_id not in df['User_ID'].values:
            error_response = {
                "results": [{"user_ID Error": f"user_ID {user_id} not found in the database."}]
            }

            response = {"results": error_response}
            api_answer = json.dumps(response, ensure_ascii=False)
            output_code = 400
        else:
            # Check if the access token is ok for user ID
            match = df[(df['User_ID'] == user_id) & (df['Access_Token'] == access_token)]
            
            if match.empty:
                error_response = {
                    "results": [{"Access_Token Error": f"Access token is incorrect for user ID {user_id}"}]
                }

                response = {"results": error_response}
                api_answer = json.dumps(response, ensure_ascii=False)
                output_code = 400

            else:
                # Correct query (same as ML 5)
                
                # Store the countries we want to filter in a list
                countries_param = query_parameters.get("countries", [])
                if countries_param != []:
                    countries_list = countries_param[0].split(",") 
                else:
                    countries_list = ["AU", "BR", "CA", "CH", "DE", "DK", "ES", "FI", "FR", "GB", "IE", "IR", "NO", "NL", "NZ", "TR", "US"]
                
                # Store the genders we want to filter in a list
                gender_param = query_parameters.get("gender", [])
                if gender_param != []:
                    gender_list = gender_param[0].split(",") 
                else:
                    gender_list = ['male', 'female']
                
                # Store the ages we want to filter in a list
                age_param = query_parameters.get("age", [])
                if age_param != []:
                    age_list = age_param[0].replace('[','').replace(']','').split(', ')
                    age_list = [int(i) for i in age_list] # Put it as integers because in the json files is a number
                else:
                    age_list = list(range(18, 100))

                # Store the months we want to filter in a list
                month_param = query_parameters.get("month", [])
                if month_param != []:
                    month_list = month_param[0].split(',')
                    month_list = [int(i) for i in month_list] # Put it as integers because in the json files is a number
                else:
                    month_list = list(range(1, 13))

                # To access the data from the JSON file
                with open("json_rand_users_5000.json", "r") as json_file:
                    data = json.load(json_file)["results"]

                filtered_data = [] # Where we will add the users that meet our conditions

                for user in data:
                    date_dob = datetime.strptime(user['dob']['date'], '%Y-%m-%dT%H:%M:%S.%fZ') # So we can access easily to the month
                    if (user['nat'] in countries_list) and (user['gender'] in gender_list) and (date_dob.month in month_list) and (user['dob']['age'] in age_list):
                        filtered_data.append(user)
                
                results = {"results": filtered_data}

                api_answer = json.dumps(results, ensure_ascii=False)
                output_code = 200
        
        return api_answer, output_code


class MyServer(BaseHTTPRequestHandler):
    def do_GET(self):
        end_point=self.path
        
        if(end_point.startswith("/test_connectivity_M1")):
            api_answer,output_code= test_connectivity_M1(end_point)
            
        elif(end_point.startswith("/count_countries_M2")):
            api_answer,output_code= country_count_M2(end_point)

        
        elif(end_point.startswith("/count_countries_M3")):
            api_answer,output_code= country_count_M3(end_point)

            
        elif(end_point.startswith("/age_groups_M4")):
            api_answer,output_code= age_groups_M4(end_point)

            
        elif(end_point.startswith("/users_M5")):
            api_answer,output_code= users_M5(end_point)

        elif(end_point.startswith("/users_control_error_M6")):
            api_answer,output_code= users_control_error_M6(end_point)
        
        elif(end_point.startswith("/access_token_M7")):
            api_answer,output_code= access_token_M7(end_point)

        
        else:
            results = {}
            json_error={}
            array = []             
            json_error['Endpoint Error']="The provided endpoint does not exist."
            array.append(json_error)
            results['results'] = array
            api_answer=json.dumps(results, ensure_ascii=False)
            output_code=400
        
        self.send_response(output_code)
        self.send_header("Content-type", "text/json")
        self.end_headers() 
        self.wfile.write(bytes(api_answer, "utf-8"))
        
if __name__ == "__main__":        
    webServer = HTTPServer((hostName, serverPort), MyServer)
    print("Server started http://%s:%s" % (hostName, serverPort))

    try:
        webServer.serve_forever()
    except KeyboardInterrupt:
        pass

    webServer.server_close()
    print("Server stopped.")



Server started http://localhost:8080
eeJiqB3RnUCuAI9061ntzg==countries=FR,GB,IE,IR,NO,NL,NZ,TR,US
<class 'str'>
Empty DataFrame
Columns: [User_ID, Access_Token]
Index: []
The match is: Empty DataFrame
Columns: [User_ID, Access_Token]
Index: []


127.0.0.1 - - [06/Nov/2023 16:11:06] "GET /access_token_M7?&user_ID=1231&access_token=eeJiqB3RnUCuAI9061ntzg==countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=%5B30,%2031,%2032,%2033,%2034,%2035,%2036,%2037,%2038,%2039,%2040,%2041,%2042,%2043,%2044,%2045%5D HTTP/1.1" 400 -


eeJiqB3RnUCuAI9061ntzg==
<class 'str'>
Empty DataFrame
Columns: [User_ID, Access_Token]
Index: []
The match is:    User_ID              Access_Token
0     1231  eeJiqB3RnUCuAI9061ntzg==


127.0.0.1 - - [06/Nov/2023 16:12:00] "GET /access_token_M7?&user_ID=1231&access_token=eeJiqB3RnUCuAI9061ntzg==&countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=%5B30,%2031,%2032,%2033,%2034,%2035,%2036,%2037,%2038,%2039,%2040,%2041,%2042,%2043,%2044,%2045%5D HTTP/1.1" 200 -
127.0.0.1 - - [06/Nov/2023 16:13:05] "GET /access_token_M7?&access_token=eeJiqB3RnUCuAI9061ntzg==&countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=%5B30,%2031,%2032,%2033,%2034,%2035,%2036,%2037,%2038,%2039,%2040,%2041,%2042,%2043,%2044,%2045%5D HTTP/1.1" 400 -
127.0.0.1 - - [06/Nov/2023 16:13:33] "GET /access_token_M7?&user_ID=1233&access_token=eeJiqB3RnUCuAI9061ntzg==&countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=%5B30,%2031,%2032,%2033,%2034,%2035,%2036,%2037,%2038,%2039,%2040,%2041,%2042,%2043,%2044,%2045%5D HTTP/1.1" 400 -


eeJiqB3RnUCuAI9061ntzg==
<class 'str'>
Empty DataFrame
Columns: [User_ID, Access_Token]
Index: []
The match is:    User_ID              Access_Token
0     1231  eeJiqB3RnUCuAI9061ntzg==


127.0.0.1 - - [06/Nov/2023 16:13:38] "GET /access_token_M7?&user_ID=1231&access_token=eeJiqB3RnUCuAI9061ntzg==&countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=%5B30,%2031,%2032,%2033,%2034,%2035,%2036,%2037,%2038,%2039,%2040,%2041,%2042,%2043,%2044,%2045%5D HTTP/1.1" 200 -


eeJiqB3RnUCuAI9061ntzg=
<class 'str'>
Empty DataFrame
Columns: [User_ID, Access_Token]
Index: []
The match is: Empty DataFrame
Columns: [User_ID, Access_Token]
Index: []


127.0.0.1 - - [06/Nov/2023 16:14:00] "GET /access_token_M7?&user_ID=1231&access_token=eeJiqB3RnUCuAI9061ntzg=&countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=%5B30,%2031,%2032,%2033,%2034,%2035,%2036,%2037,%2038,%2039,%2040,%2041,%2042,%2043,%2044,%2045%5D HTTP/1.1" 400 -


eeJiqB3RnUCuAI9061ntzg==
<class 'str'>
Empty DataFrame
Columns: [User_ID, Access_Token]
Index: []
The match is:    User_ID              Access_Token
0     1231  eeJiqB3RnUCuAI9061ntzg==


127.0.0.1 - - [06/Nov/2023 16:14:07] "GET /access_token_M7?&user_ID=1231&access_token=eeJiqB3RnUCuAI9061ntzg==&countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=%5B30,%2031,%2032,%2033,%2034,%2035,%2036,%2037,%2038,%2039,%2040,%2041,%2042,%2043,%2044,%2045%5D HTTP/1.1" 200 -


Server stopped.
