# Reading Data

In [1]:
import os
import sys

file_path = 'world_happiness_index_2013_2023.csv'

if not os.path.isfile(file_path):  # check if the file is available or not
    print("File not found.")
    sys.exit()
data = [] # we will store our data in list 
with open(file_path, 'r') as file:   # open file in read mode
    for line in file: # iterate through each line
        row = line.strip().split(',')  # split the line at comma
        data.append(row) # append each row in the data list
# in above code we opened file in read mode and store it as lists within list. Each list represent a row

In [2]:
len(data)  # our data has 1671 records/rows

1671

In [3]:
data[:10]  # print first 10 reocrds

[['Country', 'Year', 'Index', 'Rank'],
 ['Afghanistan', '2013', '4.04', '143.0'],
 ['Singapore', '2013', '6.546', '30.0'],
 ['Comoros', '2013', '3.851', '149.0'],
 ['Sierra Leone', '2013', '4.318', '127.0'],
 ['Congo', '2013', '', ''],
 ['Serbia', '2013', '4.813', '106.0'],
 ['Congo Brazzaville', '2013', '4.297', '129.0'],
 ['Senegal', '2013', '3.959', '147.0'],
 ['Congo Kinshasa', '2013', '4.578', '117.0']]

# Task 1
- **Top 10 Happiest Countries**

In [4]:
# Function to separate country data by a specific year
def separate_countries_by_year(data, year):
    year_i = []
    country_i = []
    index_i = []
    rank_i = []
    for row in data:
        cntry, yr, ind, rnk = row
        
        # Check if the row matches the specified year and index is not empty
        if yr == year and ind != '':
            country_i.append(cntry)
            year_i.append(yr)
            index_i.append(float(ind))
            rank_i.append(rnk)
    
    return country_i, year_i, index_i, rank_i

# Function to perform selection sort based on index values
def selection_sort(index_i, country_i):
    # Create a list of tuples pairing elements from lists index_i and country_i
    paired_lists = list(zip(index_i, country_i))

    # Perform a selection sort on list index_i and synchronize the sorting on list country_i in descending order
    for i in range(len(index_i)):
        max_index = i
        for j in range(i + 1, len(index_i)):
            if paired_lists[j][0] > paired_lists[max_index][0]:
                max_index = j
        
        # Swap elements in the paired list
        paired_lists[i], paired_lists[max_index] = paired_lists[max_index], paired_lists[i]

    # Extract the sorted elements from list country_i
    sorted_countries = [pair[1] for pair in paired_lists]
    return list(zip(sorted_countries, sorted(index_i, reverse=True)))

# Function to get top countries by year
def get_top_countries_by_year(data, specific_year=None, top_count=5, top_from_bottom=False, print_all=True):
    year_list = ['2013','2015','2016','2017','2018','2019','2020','2021','2022','2023']
    
    # Process for a specific year if provided and it exists in the year list
    if specific_year:
        if specific_year in year_list:
            country_i, year_i, index_i, rank_i = separate_countries_by_year(data, specific_year)
            top_countries = selection_sort(index_i, country_i)
            if top_from_bottom:
                print(f"Top {top_count} least happiest countries in year {specific_year}\n", top_countries[-top_count:], '\n\n')
            else:
                print(f"Top {top_count} most happiest countries in year {specific_year}\n", top_countries[:top_count], '\n\n')
        else:
            print("Please select year that is in this list {year_list}")
    
    # Process for all years if print_all is True and no specific year is provided
    if print_all and not specific_year:
        for year in year_list: # to print data for each year
            country_i, year_i, index_i, rank_i = separate_countries_by_year(data, year)
            top_countries = selection_sort(index_i, country_i)
            if top_from_bottom:
                print(f"Top {top_count} least happiest countries in year {year}\n", top_countries[-top_count:], '\n\n')
            else:
                print(f"Top {top_count} most happiest countries in year {year}\n", top_countries[:top_count], '\n\n')


In [5]:

# separate the header row and data rows
header = data[0]
data_ = data[1:]
# set top_from_bottom=True if you want to print least happiest countries (by default it is false)
# set top_count as your requirement (by default top 5 countries)
# set specific_year as your requirement 
# print_all is by default True. But if you specify specific_year then it will be ignored whether you set print_all=False or not
top_countries_data = get_top_countries_by_year(data_, specific_year='2019', top_from_bottom=False, top_count=5, print_all=False)
top_countries_data

Top 5 most happiest countries in year 2019
 [('Finland', 7.769), ('Denmark', 7.6), ('Norway', 7.554), ('Iceland', 7.554), ('Netherlands', 7.488)] 




In [17]:
top_countries_data = get_top_countries_by_year(data_, top_from_bottom=True, top_count=3)
top_countries_data

Top 3 least happiest countries in year 2013
 [('Central African Republic', 3.623), ('Benin', 3.528), ('Togo', 2.936)] 


Top 3 least happiest countries in year 2015
 [('Syria', 3.006), ('Burundi', 2.905), ('Togo', 2.839)] 


Top 3 least happiest countries in year 2016
 [('Togo', 3.303), ('Syria', 3.069), ('Burundi', 2.905)] 


Top 3 least happiest countries in year 2017
 [('Tanzania', 3.349), ('Burundi', 2.905), ('Central African Republic', 2.693)] 


Top 3 least happiest countries in year 2018
 [('South Sudan', 3.254), ('Central African Republic', 3.083), ('Burundi', 2.905)] 


Top 3 least happiest countries in year 2019
 [('Afghanistan', 3.203), ('Central African Republic', 3.083), ('South Sudan', 2.853)] 


Top 3 least happiest countries in year 2020
 [('Zimbabwe', 3.299), ('South Sudan', 2.817), ('Afghanistan', 2.567)] 


Top 3 least happiest countries in year 2021
 [('Rwanda', 3.415), ('Zimbabwe', 3.145), ('Afghanistan', 2.523)] 


Top 3 least happiest countries in year 2022
 [('Z

# Task 2
- **The top 3 countries that have gotten 1st in the happiness index for all the data provided the most number of times**
- **The 3 unhappiest countries based on happiness index the most number of times.**

In [40]:
# Function to find top countries with the most first positions in the index across 10 years
def top_3_Countries_with_most_first_positions(data, top_count=5, top_from_bottom=False):
    year_list = ['2013','2015','2016','2017','2018','2019','2020','2021','2022','2023']
    result = []

    # Iterate through each year in the year_list
    for year in year_list:
        # Retrieve country, year, index, and rank data for each year
        country_i, year_i, index_i, rank_i = separate_countries_by_year(data, year)
        
        # Perform selection sort based on the index for the current year
        top_countries = selection_sort(index_i, country_i)
        
        # Append top countries to result based on top_from_bottom condition
        if top_from_bottom:
            result.append((year, top_countries[-1:]))
        else:
            result.append((year, top_countries[0:1]))


    top_in_10_years = []

    # Extract the top country in each year's top list and create a list of top countries over 10 years
    inde=0
    if top_from_bottom:
        inde = -1
    for first in result:
        top_in_10_years.append(first[1][inde][0])

    count_dict = {}
    # Count occurrences of each item in the list
    for item in top_in_10_years:
        if item in count_dict:
            count_dict[item] += 1
        else:
            count_dict[item] = 1

    # Create a list of tuples with unique items and their counts
    result_list = [(value, key) for key, value in count_dict.items()]
    print(result)
    print("top 3 Countries with most first positions")
    return sorted(result_list, reverse=True)[:3]


In [41]:
# select top_from_bottom=True if you want to print most unhappiest countries

top_countries_data = top_3_Countries_with_most_first_positions(data_, top_from_bottom=True)
top_countries_data

[('2013', [('Togo', 2.936)]), ('2015', [('Togo', 2.839)]), ('2016', [('Burundi', 2.905)]), ('2017', [('Central African Republic', 2.693)]), ('2018', [('Burundi', 2.905)]), ('2019', [('South Sudan', 2.853)]), ('2020', [('Afghanistan', 2.567)]), ('2021', [('Afghanistan', 2.523)]), ('2022', [('Afghanistan', 2.404)]), ('2023', [('Afghanistan', 1.859)])]
top 3 Countries with most first positions


[(4, 'Afghanistan'), (2, 'Togo'), (2, 'Burundi')]

# Task 3
- **Specific country with increasing its rank or decreasing its rank over specific period**

In [37]:
def find_country_rank(data, country, start_year, end_year):
    ranks_over_period = []
    
    # Convert start_year and end_year to integers
    start_year = int(start_year)
    end_year = int(end_year)
    
    # Iterate through the data for the specified country within the period
    for row in data:
        cntry, year, _, rank = row
        
        # Check if the row matches the specified country and falls within the specified period
        if cntry == country and start_year <= int(year) <= end_year and rank != '':
            ranks_over_period.append(int(float(rank)))
    
    if len(ranks_over_period) < 2:
        print(f"Not enough data available to analyze rank changes for {country} between {start_year} and {end_year}.")
    else:
        change = "decreasing" if ranks_over_period[-1] < ranks_over_period[0] else "increasing"
        print(f"The rank of {country} has been {change} over the period from {start_year} to {end_year}: {ranks_over_period[0]}->{ranks_over_period[-1]}")

# Example usage:
find_country_rank(data_, "India", 2021, 2023)  # Change country name and period as needed


The rank of India has been decreasing over the period from 2021 to 2023: 139->126


In [29]:
# # Function to find whether a country's rank has increased or decreased over a specific period
# def find_country_rank(data, country, period):
#     one_country = []

#     # Iterate through the data to collect rows pertaining to the specified country
#     for i, row in enumerate(data):
#         if row[0] == country:
#             one_country.append(row)
#     print(one_country)
#     # Check the change in rank over the specified period
#     if float(one_country[0][3]) > float(one_country[period - 1][3]):
#         print(f"{country} rank decreasing over period of {period} years")
#     else:
#         print(f"{country} rank increasing over period of {period} years")

# # Example usage:
# find_country_rank(data_, "Finland", 3)


[['Finland', '2013', '7.389', '7.0'], ['Finland', '2015', '7.406', '6.0'], ['Finland', '2016', '7.413', '5.0'], ['Finland', '2017', '7.469', '5.0'], ['Finland', '2018', '7.632', '1.0'], ['Finland', '2019', '7.769', '1.0'], ['Finland', '2020', '7.809', '1.0'], ['Finland', '2021', '7.842', '1.0'], ['Finland', '2022', '7.821', '1.0'], ['Finland', '2023', '7.804', '1.0']]
Finland rank decreasing over period of 3 years


# Task 4
- find list of countries

In [80]:
# function to find list of countries
def list_countries(data, dsc=False): # select dsc true if you want descending order
    countries = []  
    for row in data:
        countries.append(row[0])
    return sorted(list(set(countries)), reverse=dsc)
list_countries(data_, dsc=True)

['Zimbabwe',
 'Zambia',
 'Yemen',
 'Vietnam',
 'Venezuela',
 'Uzbekistan',
 'Uruguay',
 'United States',
 'United Kingdom',
 'United Arab Emirates',
 'Ukraine',
 'Uganda',
 'Turkmenistan',
 'Turkiye',
 'Tunisia',
 'Trinidad and Tobago',
 'Togo',
 'Thailand',
 'Tanzania',
 'Tajikistan',
 'Taiwan',
 'Syria',
 'Switzerland',
 'Sweden',
 'Swaziland',
 'Suriname',
 'Sudan',
 'Sri Lanka',
 'Spain',
 'South Sudan',
 'South Korea',
 'South Africa',
 'Somaliland',
 'Somalia',
 'Slovenia',
 'Slovakia',
 'Singapore',
 'Sierra Leone',
 'Serbia',
 'Senegal',
 'Saudi Arabia',
 'Rwanda',
 'Russia',
 'Romania',
 'Qatar',
 'Puerto Rico',
 'Portugal',
 'Poland',
 'Philippines',
 'Peru',
 'Paraguay',
 'Panama',
 'Palestine',
 'Pakistan',
 'Oman',
 'Norway',
 'North Macedonia',
 'North Cyprus',
 'Nigeria',
 'Niger',
 'Nicaragua',
 'New Zealand',
 'Netherlands',
 'Nepal',
 'Namibia',
 'Myanmar',
 'Mozambique',
 'Morocco',
 'Montenegro',
 'Mongolia',
 'Moldova',
 'Mexico',
 'Mauritius',
 'Mauritania',
 'Mal

# Task 5

- countries with or above specific index value

In [45]:
# function to find countries with or above specific index value
def countries_with_index_above(data, index_threshold):
    countries_above_index = []
    for row in data:
        if row[2] and float(row[2]) >= index_threshold:  # Ensure index is available and meets the threshold
            countries_above_index.append((row[0], float(row[2])))  # Store country and index as tuple

    # Sort countries in descending order based on their index
    sorted_countries = sorted(countries_above_index, key=lambda x: x[1], reverse=True)
    return sorted_countries

# Example usage:
index_threshold = 7.5  # Set the desired index threshold
result_countries = countries_with_index_above(data_, index_threshold)

for country, index in result_countries:
    print(f"{country}: {index}")


Finland: 7.842
Finland: 7.821
Finland: 7.809
Finland: 7.804
Finland: 7.769
Denmark: 7.693
Norway: 7.655
Switzerland: 7.65
Denmark: 7.646
Denmark: 7.636
Finland: 7.632
Denmark: 7.62
Denmark: 7.6
Norway: 7.594
Switzerland: 7.587
Denmark: 7.586
Switzerland: 7.571
Iceland: 7.561
Switzerland: 7.56
Iceland: 7.557
Denmark: 7.555
Norway: 7.554
Iceland: 7.554
Iceland: 7.554
Norway: 7.537
Iceland: 7.53
Denmark: 7.527
Denmark: 7.526
Norway: 7.522
Denmark: 7.522
Netherlands: 7.512
Switzerland: 7.512
Switzerland: 7.509
Iceland: 7.504
Iceland: 7.504
Iceland: 7.501


# Task 6
- group contries contries by rank

In [56]:
# group contries contries by rank
def group_countries_by_rank_ranges(data):
    # Filter data for the last 5 years
    last_5_years_data = [row for row in data if int(row[1]) >= 2019]

    # Initialize a dictionary to store countries by rank ranges
    rank_ranges = {f"{i}-{i+9}": [] for i in range(1, 151, 10)}

    # Group countries by rank ranges for the last 5 years
    for row in last_5_years_data:
        rank = row[3]
        if rank and rank!="":  # Check if rank is available and numeric
            rank = int(float(rank))
            for start_rank in range(1, 151, 10):
                end_rank = start_rank + 9
                if start_rank <= rank <= end_rank:
                    rank_ranges[f"{start_rank}-{end_rank}"].append(row[0])
                    break  # Stop checking other ranges once added

    return rank_ranges

# Example usage:
rank_groups = group_countries_by_rank_ranges(data_)

# Print the countries in each rank range for the last 5 years
for rank_range, countries in rank_groups.items():
    if countries:  # Display only non-empty rank ranges
        print(f"Rank Range {rank_range}: {countries}")


Rank Range 1-10: ['Netherlands', 'Canada', 'Denmark', 'Austria', 'New Zealand', 'Finland', 'Norway', 'Switzerland', 'Sweden', 'Iceland', 'Norway', 'Denmark', 'Iceland', 'Finland', 'Switzerland', 'Sweden', 'Austria', 'New Zealand', 'Netherlands', 'Luxembourg', 'Switzerland', 'Sweden', 'Austria', 'Finland', 'New Zealand', 'Netherlands', 'Iceland', 'Luxembourg', 'Denmark', 'Norway', 'Switzerland', 'Denmark', 'Luxembourg', 'Iceland', 'Sweden', 'Israel', 'Netherlands', 'New Zealand', 'Norway', 'Finland', 'Netherlands', 'Norway', 'Finland', 'New Zealand', 'Israel', 'Iceland', 'Luxembourg', 'Sweden', 'Switzerland', 'Denmark']
Rank Range 11-20: ['Germany', 'United States', 'Australia', 'Czechia', 'United Kingdom', 'Costa Rica', 'Israel', 'Ireland', 'Luxembourg', 'Belgium', 'Israel', 'United States', 'Germany', 'Canada', 'Costa Rica', 'Belgium', 'Ireland', 'Czechia', 'Australia', 'United Kingdom', 'Belgium', 'Costa Rica', 'Canada', 'Germany', 'Israel', 'Ireland', 'United Kingdom', 'Czechia', 'A

# Task 7
- countries_with_consecutive_lower_ranks over specific period

In [62]:
# countries_with_consecutive_lower_ranks over specific period
def countries_with_consecutive_lower_ranks(data, consecutive_years):
    countries = set()
    for i in range(len(data)):
        lower_count = 0
        for j in range(consecutive_years):
            # Check for missing or non-numeric rank values
            if i + j < len(data) - 1 and data[i + j][3] and data[i + j + 1][3]:
                if data[i + j][3]!="" and data[i + j + 1][3]!="":
                    if int(float(data[i + j][3])) > int(float(data[i + j + 1][3])):
                        lower_count += 1
                    else:
                        break  # Reset count if ranks are not consecutive
                else:
                    break  # Reset count if rank values are not numeric
            else:
                break  # Reset count if rank values are missing

        if lower_count == consecutive_years - 1:  # Check if consecutive lower ranks occurred
            countries.add(data[i][0])

    return list(countries)


consecutive_years = 3  # Set the number of consecutive years to check
countries_with_consecutive_lower = countries_with_consecutive_lower_ranks(data_, consecutive_years)

print(f"Countries with at least {consecutive_years} consecutive years of lower ranks: {countries_with_consecutive_lower}")


Countries with at least 3 consecutive years of lower ranks: ['France', 'Niger', 'India', 'Romania', 'Lebanon', 'Jamaica', 'Lesotho', 'Ukraine', 'Yemen', 'Congo Kinshasa', 'Mali', 'Turkiye', 'Bolivia', 'South Africa', 'Comoros', 'Egypt', 'Namibia', 'Haiti', 'Greece', 'Mongolia', 'Senegal', 'Ethiopia', 'Rwanda', 'Syria', 'Cyprus', 'South Korea', 'Pakistan', 'Central African Republic', 'Honduras', 'Somalia', 'Turkmenistan', 'Argentina', 'Iran', 'Burkina Faso', 'Zambia', 'Vietnam', 'Hungary', 'Angola', 'Georgia', 'Mozambique', 'Tajikistan', 'Cambodia', 'Kyrgyzstan', 'Mauritania', 'Indonesia', 'China', 'Albania', 'Brazil', 'Somaliland', 'Nigeria', 'Botswana', 'Algeria', 'Malaysia', 'Liberia', 'North Macedonia', 'Ivory Coast', 'Russia', 'Palestine', 'Slovakia', 'Bangladesh', 'Chad', 'Togo', 'Burundi', 'Congo Brazzaville', 'Taiwan', 'Kuwait', 'Dominican Republic', 'South Sudan', 'Afghanistan', 'Zimbabwe', 'Gabon', 'Madagascar', 'Armenia', 'Tanzania', 'Tunisia', 'Venezuela', 'Sierra Leone', 'G

# Task 8
- specific country details

In [81]:
# Function to extract details of a specific country from the dataset
def country_details(data, country_name):
    # Filter rows related to the specified country and ensure valid data for index and rank
    country_data = [row for row in data if row[0] == country_name and row[2] and row[3] 
                    and row[2] != '' and row[3] != '']

    # Check if country data is found or if data is missing/invalid
    if not country_data:
        print(f"Country '{country_name}' not found or missing data.")
        return None

    # Extract indexes and ranks from valid data rows
    indexes = [float(row[2]) for row in country_data if row[2].replace('.', '', 1) != ""]
    ranks = [int(float(row[3])) for row in country_data if row[3] != ""]

    # Check if extracted indexes or ranks are empty or invalid
    if not indexes or not ranks:
        print(f"Country '{country_name}' has invalid data.")
        return None

    # Calculate various statistics for the country
    avg_rank = sum(ranks) / len(ranks)
    rank_range = (min(ranks), max(ranks))
    index_range = (min(indexes), max(indexes))
    index_std_dev = (sum((index - avg_rank) ** 2 for index in indexes) / len(indexes)) ** 0.5
    highest_rank_year = country_data[ranks.index(max(ranks))][1]
    lowest_rank_year = country_data[ranks.index(min(ranks))][1]

    # Construct and return a dictionary containing country details
    return {
        'Country': country_name,
        'Average Rank': avg_rank,
        'Rank Range': rank_range,
        'Index Range': index_range,
        'Standard Deviation of Indexes': index_std_dev,
        'Year of Highest Rank': highest_rank_year,
        'Year of Lowest Rank': lowest_rank_year
    }

# Define the country name to extract details for
country_name = "Niger"  # Replace with desired country name

# Retrieve and display details if available
details = country_details(data_, country_name)
if details:
    print("Country Details:")
    for key, value in details.items():
        print(f"{key}: {value}")


Country Details:
Country: Niger
Average Rank: 121.7
Rank Range: (96, 144)
Index Range: (3.845, 5.074)
Standard Deviation of Indexes: 117.28455097539488
Year of Highest Rank: 2015
Year of Lowest Rank: 2021


In [70]:
def select_task():
    instructions = """
        Please Select from the menu to perform specific operation
        1. Top 10 happiest countries or least happiest
        2. top 3 countries that have most first positions from top and bottom
        3. Specific country with increasing its rank or decreasing its rank over specific period
        4. find list of countries
        5. countries with or above specific index value
        6. group contries contries by rank
        7. countries_with_consecutive_lower_ranks over specific period
        8. specific country details
    """
    print(instructions)
    choice = int(input("Please select 1 to 8 number to perform operations: "))
    header = data[0]
    data_ = data[1:]
    if choice==1:
        specific_year = input("Enter year: ")
        top_from_bottom = bool(input("select top_from_bottom value(True/False)"))
        top_count=int(input("Enter top count: "))

        top_countries_data = get_top_countries_by_year(data_, specific_year=specific_year, top_from_bottom=top_from_bottom, top_count=top_count, print_all=False)
        top_countries_data
    elif choice==2:
        top_from_bottom = bool(input("select top_from_bottom value(True/False)"))
        top_count=int(input("Enter top count: "))
        top_countries_data = top_3_Countries_with_most_first_positions(data_, top_from_bottom=top_from_bottom, top_count=top_count)
        print(top_countries_data)
    elif choice==3:
        country = input("Enter country name(that are in list): ")
        period = int(input("Enter period value(int 1 to 10): "))
        find_country_rank(data_, country, period)
    elif choice==4:
        dsc = bool(input("descending order?(True/False): "))
        print(list_countries(data_, dsc=dsc))
    elif choice==5:
        index_threshold = float(input("Enter threshold value(float): "))
        result_countries = countries_with_index_above(data_, index_threshold)
        for country, index in result_countries:
            print(f"{country}: {index}")
    elif choice==6:
        rank_groups = group_countries_by_rank_ranges(data_)
        # Print the countries in each rank range for the last 5 years
        for rank_range, countries in rank_groups.items():
            if countries:  # Display only non-empty rank ranges
                print(f"Rank Range {rank_range}: {countries}")
    elif choice==7:
        consecutive_years = int(input("Enter the number of consective years: "))
        countries_with_consecutive_lower = countries_with_consecutive_lower_ranks(data_, consecutive_years)

        print(f"Countries with at least {consecutive_years} consecutive years of lower ranks: {countries_with_consecutive_lower}")
    elif choice==8:
        country_name = input("Enter Country name: ")
        details = country_details(data_, country_name)
        if details:
            print("Country Details:")
            for key, value in details.items():
                print(f"{key}: {value}")
    else:
        print("Please select a valid choice!!")
select_task()


        Please Select from the menu to perform specific operation
        1. Top 10 happiest countries or least happiest
        2. top 3 countries that have most first positions from top and bottom
        3. Specific country with increasing its rank or decreasing its rank over specific period
        4. find list of countries
        5. countries with or above specific index value
        6. group contries contries by rank
        7. countries_with_consecutive_lower_ranks over specific period
        8. specific country details
    
Top 5 least happiest countries in year 2015
 [('Rwanda', 3.465), ('Benin', 3.34), ('Syria', 3.006), ('Burundi', 2.905), ('Togo', 2.839)] 


