In [1]:
from http.server import BaseHTTPRequestHandler, HTTPServer
import os
import json
from collections import Counter  # add
from urllib.parse import urlparse, parse_qs
import csv

#You should not change this lines.
#They define the parameters required to run the server.
hostName = "localhost"
serverPort = 8080


#MILESTONE 1
#endpoint => http://localhost:8080/test_connectivity_M1

#You just need to test the connectivity that returns the following JSON
#{"results": [{"Connectivity": "OK"}]}

#JSON TO DISPLAY IN CLIENT:
#{"results": [{"Connectivity": "OK"}]}

def test_connectivity_M1(text):
    """
    Milestone 1:
    This endpoint is used only to test basic connectivity.
    The server must return a fixed JSON confirming the connection.
    Expected output:
    {"results": [{"Connectivity": "OK"}]}
    """
    results = {"results": [{"Connectivity": "OK"}]}
    api_answer = json.dumps(results, ensure_ascii=False)#json.dumps transforms object into string, ensure_ascii
    # ensure_ascii=False → keeps non-ASCII characters readable (no \u escaping).
    output_code = 200
    return api_answer, output_code

#MILESTONE 2
#endpoint => http://localhost:8080/count_countries_M2
#Country Count. Provide the number of entries per country 
#in the dataset in a sorted way. First the most frequent country.
#TEXT TO DISPLAY IN CLIENT:
# 1: TR(317)
# 2: DE(317)
# 3: NO(315)
# 4: BR(312)
# 5: IR(311)
# 6: CA(297)
# 7: FR(297)
# 8: CH(296)
# 9: IE(293)
# 10: DK(290)
# 11: NZ(288)
# 12: AU(285)
# 13: ES(281)
# 14: GB(278)
# 15: NL(276)
# 16: FI(276)
# 17: US(271)

with open("json_rand_users_5000.json", "r", encoding="utf-8") as f:
    data = json.load(f) # load JSON file into a Python dict
    USERS_DB = data["results"] # extract the list of users

    
def country_count_M2(text):
    """
    Milestone 2:
    Count how many users belong to each country (field 'nat')
    in the dataset. Return results sorted by:
        1) descending count
        2) ascending country code if the count is equal
    
    The response format should be:
    {
        "results": [
            {"country": "TR", "count": 317},
            {"country": "DE", "count": 317},
            ...
        ]
    }
    """

    # Count occurrences of each nationality code in the dataset
    counts = Counter(user["nat"] for user in USERS_DB)

    # Sort: most frequent first(-x[-1]), then alphabetical(x[0])
    sorted_counts = sorted(counts.items(), key=lambda x: (-x[1], x[0])) # lambda builds sorting key: (-count → descending), (country → alphabetical)

    # Build the JSON response
    results_list = [
        {"country": country, "count": count}
        for country, count in sorted_counts
    ]

    results = {"results": results_list}
    api_answer = json.dumps(results, ensure_ascii=False)
    output_code = 200
    return api_answer, output_code
    
#MILESTONE 3    
#endpoint=>http://localhost:8080/count_countries_M3?countries=DK,ES,GB,IE,IR,NO,NL,NZ

#It is the same than before but in case it just returns the result for the provided countries.
#The list of available countries in the raduser API (field nat) is:
#AU, BR, CA, CH, DE, DK, ES, FI, FR, GB, IE, IR, NO, NL, NZ, TR, US
#If not countries parameter is provided you have to provide the results for all the countries
#NOTE: NO NEED TO IMPLEMENT CONTROL OF ERRORS

#TEXT TO DISPLAY IN CLIENT THAT ONLY CONSIDERS THE COUNTRIES IN THE QUERY:
# 1: NO(315)
# 2: IR(311)
# 3: IE(293)
# 4: DK(290)
# 5: NZ(288)
# 6: ES(281)
# 7: GB(278)
# 8: NL(276)

def country_count_M3(text):
    """
    Milestone 3:
    Same as M2, but the client can specify a list of countries in the
    'countries' query parameter, e.g.:
        /count_countries_M3?countries=DK,ES,GB,IE,IR,NO,NL,NZ

    If 'countries' is not provided, use all countries (same as M2).
    """

    # Parse query parameters from the URL
    '''
    Request without parameters:
    /count_countries_M3 → list for all countries.

    Request with the parameter:
    /count_countries_M3?countries=DK,ES,GB
    → we count only for DK, ES, GB.
    '''
    parsed_url = urlparse(text) # parses the string into parts: path, query parameters, etc.
    query_params = parse_qs(parsed_url.query) # parsed_url.query - the part after ? that is, the string: "countries=DK,ES,GB,IE,IR,NO,NL,NZ"

    # Get requested country codes, if provided
    requested_countries = None
    if "countries" in query_params:
        # Extract list of requested countries (e.g. "DK,ES,GB" → ["DK","ES","GB"])
        requested_countries = query_params["countries"][0].split(",")
        requested_countries = [c.strip() for c in requested_countries if c.strip()] # remove spaces/empty items

    # Count all nationalities in the dataset
    counts = Counter(user["nat"] for user in USERS_DB)

    # If specific countries requested, filter the dict
    if requested_countries is not None:
        # get() returns 0 if a country has no users
        filtered_counts = {c: counts.get(c, 0) for c in requested_countries}
    else:
        # no filter → use all countries (same as M2)
        filtered_counts = counts

    # Sort by descending count, then by country code
    sorted_counts = sorted(filtered_counts.items(), key=lambda x: (-x[1], x[0]))

    # Build JSON response (ignore countries with 0 users)
    results_list = [
        {"country": country, "count": count}
        for country, count in sorted_counts
        if count > 0
    ]

    results = {"results": results_list} # wrap in JSON structure
    api_answer = json.dumps(results, ensure_ascii=False) # convert dict → JSON string
    output_code = 200
    return api_answer, output_code


#MILESTONE 4    
#endpoint=>http://localhost:8080/age_groups_M4 
#You need to deliver the number of users in the data set
#within the following age groups: <18, 18-29, 30-49, 50-64, >64.

#ANSWER IN CLIENT
# Age <18: 0
# Age 18-29: 634
# Age 30-49: 1762
# Age 50-64: 1452
# Age >64: 1152

def age_groups_M4(text):
    """
    Milestone 4:
    Count how many users fall into each age group:
        <18, 18-29, 30-49, 50-64, >64
    Age is taken from user["dob"]["age"].
    The response is a JSON object with a single entry in "results".
    """

    # Initialize counters for each age group
    groups = {
        "<18": 0,
        "18-29": 0,
        "30-49": 0,
        "50-64": 0,
        ">64": 0
    }

    # Iterate over all users and classify them into age groups
    for user in USERS_DB:
        age = user.get("dob", {}).get("age") # we take out the dictionary "dob", then take the age from "dob"
        #If something is missing - > there will be no error ("get" protects against this).
        if age is None:
            continue  # skip if age is missing

        if age < 18:
            groups["<18"] += 1
        elif 18 <= age <= 29:
            groups["18-29"] += 1
        elif 30 <= age <= 49:
            groups["30-49"] += 1
        elif 50 <= age <= 64:
            groups["50-64"] += 1
        else:
            groups[">64"] += 1

    # Build JSON response
    results = {
        "results": [{
            "age_<18": groups["<18"],
            "age_18_29": groups["18-29"],
            "age_30_49": groups["30-49"],
            "age_50_64": groups["50-64"],
            "age_>64": groups[">64"]
        }]
    }

    api_answer = json.dumps(results, ensure_ascii=False) # json.dumps turns a Python dictionary into a JSON string
    # ensure_ascii=False saves Unicode characters normally (not as \u0420...).
    output_code = 200
    return api_answer, output_code


#MILESTONE 5
#endpoint => http://localhost:8080/users_M5?countries=FR,GB,IE,IR,NO,NL,NZ,TR,US&gender=female&month=01,02,03,04,05&age=30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45
#You need to find users meeting the list of parameters in the query.
#The query should work with 1, 2, 3 or 4 of the fields combined in whatever

#countries=AU,BR,CA,CH,DE,DK,ES,FI,FR,GB,IE,IR,NO,NL,NZ,TR,US  [use the nat field for this] (if not defined consider all countries)
#gender=male,female (if not used consider all genders)
#month= 01,02,03,04,05,06,07,08,09,10,11,12 (if not used consider all the months)
#age:Any value between 18 and 99 (if not used consider all ages)

#NOTE1: IT IS NOT NEEDED TO DO CONTROL ERRORS. 
#NOTE2: You need to provide the total number of users and the 100 first users in the JOSN as follows:
#NOTE3: some times the query selection my obtain less than 100 users then print all of them

#TEXT TO DISPLAY IN CLIENT:

# NUM. USERS:165
# User 1:
# 	-Name: Linne Hoogkamer
# 	-Gender: female
# 	-Age: 40
# 	-Country: Netherlands
# 	-Postcode: 42173
# 	-Email: linne.hoogkamer@example.com
# User 2:
# 	-Name: Enola Martin
# 	-Gender: female
# 	-Age: 43
# 	-Country: France
# 	-Postcode: 93894
# 	-Email: enola.martin@example.com
# User 3:
# 	-Name: Mathilda Skjevik
# 	-Gender: female
# 	-Age: 42
# 	-Country: Norway
# 	-Postcode: 6630
# 	-Email: mathilda.skjevik@example.com
# User 4:
# 	-Name: Valentine Thomas
# 	-Gender: female
# 	-Age: 35
# 	-Country: France
# 	-Postcode: 84452
# 	-Email: valentine.thomas@example.com
#.....
#.....
#.....
#User 100:
# 	-Name: Cecilia Asphaug
# 	-Gender: female
# 	-Age: 42
# 	-Country: Norway
# 	-Postcode: 5052
# 	-Email: cecilia.asphaug@example.com

def users_M5(text):
    """
    Milestone 5:
    Return all users that match the filters provided in the query parameters:
        - countries: list of nat codes (AU, BR, CA, ...)
        - gender: list of genders (male, female)
        - month: list of month numbers as strings ("01".."12"), based on date of birth
        - age: list of ages (18..99), based on dob["age"]

    If a parameter is NOT provided, we do not filter by that field.
    The response contains all matching users; the client will print only the first 100.
    """

    # Parse URL and query parameters
    parsed_url = urlparse(text) # text — the string like "/users_M5?countries=FR,GB&gender=female&age=30,31"
    # urlparse highlights the part after "?"
    query_params = parse_qs(parsed_url.query) 
    '''
    parse_qs turns this into a dictionary:
    {
      "countries": ["FR,GB"],
      "gender": ["female"],
      "age": ["30,31"]
    }
    '''
    # Helper to get list of values from query, split by comma
    def get_list_param(name):
        if name not in query_params:
            return None
        raw = query_params[name][0]  # e.g. "FR,GB,IE"
        items = [x.strip() for x in raw.split(",") if x.strip()]
        return items if items else None

    countries_filter = get_list_param("countries")  # nat field
    gender_filter = get_list_param("gender")        # "male", "female"
    month_filter = get_list_param("month")          # "01", "02", ..., "12"
    age_filter = get_list_param("age")              # "30", "31", ...

    # Convert age_filter to integers if present
    if age_filter is not None:
        ages_int = []
        for a in age_filter:
            try:
                ages_int.append(int(a))
            except ValueError:
                # in M5 we don't do error handling, just ignore invalid ages
                continue
        age_filter = ages_int if ages_int else None

    matched_users = [] # the list where we will put the suitable users

    for user in USERS_DB:
        # Extract needed fields
        nat = user.get("nat")
        gender = user.get("gender")
        dob = user.get("dob", {})
        age = dob.get("age")
        dob_date = dob.get("date", "")  # e.g. "1977-01-12T..."
        month_str = dob_date[5:7] if len(dob_date) >= 7 else None # we extract the month of birth (e.g. "01")

        # Apply filters one by one

        # Filter by countries (nat)
        if countries_filter is not None and nat not in countries_filter:
            continue # If there is a filter and the user's country is not suitable → skip this user.

        # Filter by gender
        if gender_filter is not None and gender not in gender_filter:
            continue

        # Filter by month of birth
        if month_filter is not None and month_str not in month_filter:
            continue

        # Filter by age
        if age_filter is not None and (age is None or age not in age_filter):
            continue


 
        # If all filters passed, get more relevant info and add user to result
        location = user.get("location", {})
        country_name = location.get("country")
        postcode = location.get("postcode")
        # Postcode can be int or string; convert to string for consistency
        postcode_str = str(postcode) if postcode is not None else ""

        name = user.get("name", {})
        first_name = name.get("first", "")
        last_name = name.get("last", "")
        email = user.get("email", "")

        matched_users.append({
            "name_first": first_name,
            "name_last": last_name,
            "gender": gender,
            "age": age,
            "country": country_name,
            "postcode": postcode_str,
            "email": email
        })

    # Build JSON response
    results = {"results": matched_users}
    api_answer = json.dumps(results, ensure_ascii=False)
    output_code = 200
    return api_answer, output_code


#MILESTONE 6
#We have to repeat the same exercise than in M5 but this time we need to implement control error
#FORMAT CONTROL:
# Countries: If a country is not in the available list. Generate an error and inform the user which country(s) is(are) wrong with a mesasge
# Gender: If a gender is not in the list of genders. Generate an error and inform the user which gender(s) is(are) wrong with a message
# Month: If the month is not a valid value (e.g., 15). Generate an error and inform the user which month(s) is(are) wrong with a message
# Age: if the age is not in the range 18-99 it is a wrong value. Generate an error and inform the user which age(s) is(are) wrong with a message
# Field: If after ? there is an field (e.g., extra_field) different that countries, gender, age and month report the error.
# The output message should be json file with an error per category including all the wrong values employed.
#
#NOTES:
#1- The succeed of the process should report a status code 200.
#2- The failure of the process due to an error should report the status 400 (Bad request)
#3- You need to include all the errors so you should not stop the verification of the format when you find the first error.

#TEXT TO DISPLAY IN CLIENT (it is assuming all errors are present in the created json based on the following request: )
 
#JOSN output
#{"results": [{"Country Error": "The following country IDs are not supported: IT,SW,MX", 
#"Gender Error": "The following gender options are not supported: women,men", 
#"Month Error": "The following month options are not supported: 15,23", 
#"Age Error": "The following month options are not supported: 12,15,123", 
#"Field Error": "The following fields are not valid: city"}]}

#IF THERE IS AN ERROR ONLY IN 1 or 2 FIELDS THE JSON SHOULD ONLY INCLUDE THOSE ERRORS

def users_control_error_M6(text):
    """
    Milestone 6:
    Same filters as in M5 (countries, gender, month, age), but
    now we must validate:
        - invalid country codes
        - invalid gender values
        - invalid month values
        - invalid age values
        - invalid field names (query parameters)

    If any error is found:
        - return JSON with all detected errors
        - HTTP status code = 400

    If no errors are found:
        - behave like M5 (return the filtered users)
        - HTTP status code = 200
    """

    # Allowed values
    allowed_countries = {"AU", "BR", "CA", "CH", "DE", "DK", "ES", "FI",
                         "FR", "GB", "IE", "IR", "NO", "NL", "NZ", "TR", "US"}
    allowed_genders = {"male", "female"}
    allowed_months = {f"{i:02d}" for i in range(1, 13)}  # "01".."12"
    valid_fields = {"countries", "gender", "month", "age"}

    # Parse URL and query parameters
    parsed_url = urlparse(text)
    query_params = parse_qs(parsed_url.query)

    errors = {} # We will gradually add the keys "Country Error", "Gender Error", etc.
                # If errors remains empty after checking, → there are no errors.

    # ---- Country Error ----
    if "countries" in query_params:
        raw_countries = query_params["countries"][0]  # Extract the string "FR,GB,IT".
        country_list = [c.strip() for c in raw_countries.split(",") if c.strip()] # We divide it into a list → ["FR", "GB", "IT"].
        invalid_countries = [c for c in country_list if c not in allowed_countries]

        if invalid_countries:
            errors["Country Error"] = (
                "The following country IDs are not supported: " + ",".join(invalid_countries)
            )

    # ---- Gender Error ----
    if "gender" in query_params:
        raw_genders = query_params["gender"][0]  # e.g. "women,men"
        gender_list = [g.strip() for g in raw_genders.split(",") if g.strip()]
        invalid_genders = [g for g in gender_list if g not in allowed_genders]

        if invalid_genders:
            errors["Gender Error"] = (
                "The following gender options are not supported: " + ",".join(invalid_genders)
            )

    # ---- Month Error ----
    if "month" in query_params:
        raw_months = query_params["month"][0]  # e.g. "01,05,15"
        month_list = [m.strip() for m in raw_months.split(",") if m.strip()]
        invalid_months = [m for m in month_list if m not in allowed_months]

        if invalid_months:
            errors["Month Error"] = (
                "The following month options are not supported: " + ",".join(invalid_months)
            )

    # ---- Age Error ----
    if "age" in query_params:
        raw_ages = query_params["age"][0]  # e.g. "30,12,123"
        age_list = [a.strip() for a in raw_ages.split(",") if a.strip()]
        invalid_ages = []

        for a in age_list:
            try:
                age_int = int(a)
                if age_int < 18 or age_int > 99:
                    invalid_ages.append(a)
            except ValueError:
                invalid_ages.append(a)

        if invalid_ages:
            errors["Age Error"] = (
                "The following age options are not supported: " + ",".join(invalid_ages)
            )

    # ---- Field Error: unsupported parameter names ----

    invalid_fields = [field for field in query_params.keys() if field not in valid_fields] # If someone passed: &city=Madrid, then invalid_fields = ["city"]

    if invalid_fields:
        # Join without spaces to match the style in the assignment
        errors["Field Error"] = "The following fields are not valid: " + ",".join(invalid_fields)

    # ---- If any error found: return error JSON with status 400 ----
    if errors:
        results = {"results": [errors]}
        api_answer = json.dumps(results, ensure_ascii=False)
        output_code = 400
        return api_answer, output_code

    # ---- If no errors: behave as in M5 ----
    # Reuse the M5 logic to return the filtered users
    return users_M5(text)


#MILESTONE 7
#endpoint => http://localhost:8080/access_token_M7?access_token=$P$BZlw7PF1j.XDezr9sUj6moCjBlSx/e0&countries=FR,GB,IE,IR,NO,NL,NZ,TR,US
#&user_ID=9123&gender=male,female&month=01,05,06,12&age=30,31,32,33,34,35

#We have to repeat the same exercise than in M5 but this time we need to controll whether the userID and access token are correct.
#To this end we will use a query as follow where the access token parameter appears as another field. But it is a compulsory field.
#So we basically need to verify if the access token is there and if so we call the M6 endpoint.
#The URL will look like as follow:
#http://localhost:8080/access_token_M7?userID=127821&access_token=XXASJasnsna?.12123jk2amsass?&countries=FR,ES&age=18,19,20,21,22&gender=male
#You need to validate the user ID and access token in the list of valids access tokens.


#TEXT OTPIONS TO DISPLAY IN CLIENT FOR ERRORS (if access token and user ID correct then deliver answer associated to M6)

#1- No fields included (could be one of them or both of them) 
#{"results": [{"user_ID Error": "user_ID field not included in the parameters.", 
#"Access_Token Error": "Access_Token field not included in the parameters."}]}

#2- UserID does not exist
#{"results": [{"user_ID Error": "user_ID 2131 not found in the database."}]}

#3- User ID exists but access_token wrong
#{"results": [{"access_token Error": "access_token 7423hjmn23nsd.121 wrong for the userID 9123"}]}

# Load user_ID -> access_token mapping for Milestone 7
USER_TOKENS = {}  #  creating an empty dictionary, where the key will be user_ID, and the value will be its access_token.

with open("file_access_token_M7.csv", "r", encoding="utf-8") as f: # open the CSV file with the mapping of users and tokens.
    reader = csv.DictReader(f, delimiter=';') # we read the CSV, in which the columns
                                              # are separated by ; (and not by a comma!),
                                              # and each row is returned as a dictionary.: row = {"User_ID": "9123", "Access_Token": "$P$BZlw7..."}
    for row in reader:
        user_id = row["User_ID"]
        token   = row["Access_Token"]
        USER_TOKENS[user_id] = token


        
def access_token_M7(text):
    """
    Milestone 7:
    Check user_ID and access_token before processing the request.

    Rules:
        - user_ID must be present and exist in file_acces_token_M7.csv
        - access_token must be present and match the one associated with user_ID

    If any error:
        - return JSON with the corresponding error(s)
        - HTTP status code = 400

    If everything is correct:
        - behave like M5 (filters on countries, gender, month, age)
        - HTTP status code = 200
    """

    parsed_url = urlparse(text)
    query_params = parse_qs(parsed_url.query)

    user_id_list = query_params.get("user_ID")
    access_token_list = query_params.get("access_token")

    errors = {}

    # --- Case 1: missing fields (one or both) ---
    if user_id_list is None:
        errors["user_ID Error"] = "user_ID field not included in the parameters."
    if access_token_list is None:
        errors["Access_Token Error"] = "Access_Token field not included in the parameters."

    # If either field is missing, we can already return the errors
    if errors:
        results = {"results": [errors]}
        api_answer = json.dumps(results, ensure_ascii=False)
        output_code = 400
        return api_answer, output_code
    else:

        # Both fields are present
        user_id = user_id_list[0]          # e.g. "9123"
        access_token = access_token_list[0]  # e.g. "$P$BZlw7..."

        # --- Case 2: user_ID does not exist ---
        if user_id not in USER_TOKENS:
            errors["user_ID Error"] = f"user_ID {user_id} not found in the database."
            results = {"results": [errors]}
            api_answer = json.dumps(results, ensure_ascii=False)
            output_code = 400
            return api_answer, output_code

        # --- Case 3: user_ID exists but wrong access_token ---
        expected_token = USER_TOKENS[user_id]
        if access_token != expected_token:
            errors["access_token Error"] = (
                f"access_token {access_token} wrong for the userID {user_id}"
            )
            results = {"results": [errors]}
            api_answer = json.dumps(results, ensure_ascii=False)
            output_code = 400
            return api_answer, output_code

        # --- If everything is correct: behave exactly as M5 ---
        return users_M5(text)



class MyServer(BaseHTTPRequestHandler): # MyServer inherits from BaseHTTPRequestHandler and processes HTTP GET requests.
    def do_GET(self):
        end_point=self.path # self.path — a string like "/users_M5?countries=FR,GB&gender=female".
        
        if(end_point.startswith("/test_connectivity_M1")):
            api_answer,output_code= test_connectivity_M1(end_point)
            
        elif(end_point.startswith("/count_countries_M2")):
            api_answer,output_code= country_count_M2(end_point)

        
        elif(end_point.startswith("/count_countries_M3")):
            api_answer,output_code= country_count_M3(end_point)

            
        elif(end_point.startswith("/age_groups_M4")):
            api_answer,output_code= age_groups_M4(end_point)

            
        elif(end_point.startswith("/users_M5")):
            api_answer,output_code= users_M5(end_point)

        elif(end_point.startswith("/users_control_error_M6")):
            api_answer,output_code= users_control_error_M6(end_point)
        
        elif(end_point.startswith("/access_token_M7")):
            api_answer,output_code= access_token_M7(end_point)

        
        else: # The "endpoint not found" error
            results = {}
            json_error={}
            array = []             
            json_error['Endpoint Error']="The provided endpoint does not exist."
            array.append(json_error)
            results['results'] = array
            api_answer=json.dumps(results, ensure_ascii=False)
            output_code=400
        
        self.send_response(output_code) # we are sending the HTTP status code.
        self.send_header("Content-type", "text/json") # the Content-Type header.
        self.end_headers() # closing the headers
        self.wfile.write(bytes(api_answer, "utf-8")) # sending the JSON string as bytes.
        
 # Starting the server
if __name__ == "__main__":        
    webServer = HTTPServer((hostName, serverPort), MyServer) # Creating a server object: address + port + MyServer handler class.
    print("Server started http://%s:%s" % (hostName, serverPort))

    try:
        webServer.serve_forever() # start listening for incoming requests.
    except KeyboardInterrupt: # gracefully stop the server.
        pass

    webServer.server_close()
    print("Server stopped.")

Server started http://localhost:8080


127.0.0.1 - - [08/Dec/2025 20:46:44] "GET /access_token_M7?user_ID=9123&access_token=7423hjmn23nsd.121 HTTP/1.1" 400 -


Server stopped.
