In [None]:
import pandas as pd

# Define the URLs
url_female = "https://raw.githubusercontent.com/kocsigabor99/MAJOR-CROPS-FAODATA/main/UN_PPP2022_Forecast_PopulationBySingleAge_Female.csv"
url_male = "https://raw.githubusercontent.com/kocsigabor99/MAJOR-CROPS-FAODATA/main/UN_PPP2022_Forecast_PopulationBySingleAge_Male.csv"

# Read the CSV files into DataFrames
df_female = pd.read_csv(url_female)
df_male = pd.read_csv(url_male)

# Add a column to identify gender
df_female['Gender'] = 'Female'
df_male['Gender'] = 'Male'

# Merge the DataFrames
df_merged = pd.concat([df_female, df_male], ignore_index=True)

# Display the merged DataFrame
df_merged.head()

In [3]:
# Load the birth rate data
birth_rate_url = 'https://raw.githubusercontent.com/kocsigabor99/MAJOR-CROPS-FAODATA/main/UN_PPP2022_Birth%20rate_Single_Year.csv'

# Load the birth rate data with low_memory=False to avoid DtypeWarning
birth_rate_df = pd.read_csv(birth_rate_url, low_memory=False)

# Convert all columns except 'Year' and 'Region, subregion, country or area' to numeric, handling errors
for col in birth_rate_df.columns[2:]:
    birth_rate_df[col] = pd.to_numeric(birth_rate_df[col], errors='coerce')

# Drop rows with missing or non-numeric values in the birth rate columns
birth_rate_df = birth_rate_df.dropna(subset=birth_rate_df.columns[2:])

# Merge birth rate data with population data based on 'Year' and 'Region, subregion, country or area'
merged_df = pd.merge(df_female, birth_rate_df, on=['Year', 'Region, subregion, country or area'], suffixes=('', '_y'), how='inner')

# Calculate the number of births for each age group
for age in range(15, 50):  # Assuming age ranges from 15 to 49
    age_str = str(age)
    birth_rate_col = f'{age_str}_y'
    if age_str in merged_df.columns and birth_rate_col in merged_df.columns:
        merged_df[f'Births_{age}'] = merged_df[age_str] * merged_df[birth_rate_col] / 1000

# Sum up the number of births across all age groups to get total births
birth_columns = [f'Births_{age}' for age in range(15, 50) if f'Births_{age}' in merged_df.columns]
merged_df['Total_Births'] = merged_df[birth_columns].sum(axis=1)

In [4]:
# Calculate the number of pregnant women per single age group
pregnant_cols = {f'Pregnant_{age}': merged_df[f'Births_{age}'] for age in range(15, 50)}

# Extrapolate for pregnant teens aged 14 based on births in age 15
pregnant_cols['Pregnant_14'] = merged_df['Births_15']

# Calculate the number of breastfeeding women per single age group
# On average, a mother giving birth is for half a year the same age while breastfeeding as during the birth
breastfeeding_cols = {f'Breastfeeding_{age}': 0.5 * merged_df[f'Births_{age-2}'] + merged_df[f'Births_{age-1}'] + 0.5 * merged_df[f'Births_{age}'] for age in range(17, 50)}

# Special cases for ages 15 and 16
breastfeeding_cols['Breastfeeding_15'] = merged_df['Births_15']
breastfeeding_cols['Breastfeeding_16'] = merged_df['Births_15'] + merged_df['Births_16']

# Create a new DataFrame with these columns
new_cols = {**pregnant_cols, **breastfeeding_cols}
new_df = pd.DataFrame(new_cols)

# Concatenate the new DataFrame with the original DataFrame
merged_df = pd.concat([merged_df, new_df], axis=1)

# Optionally, save the updated DataFrame to a new CSV file
merged_df.to_csv('updated_birth_data.csv', index=False)

In [None]:
# Set pandas display option to show all columns
pd.set_option('display.max_columns', None)

# Filter the DataFrame to show the row for Lesotho in 2022
lesotho_2022 = merged_df[(merged_df['Year'] == 2022) & (merged_df['Region, subregion, country or area'] == 'Lesotho')]

# Display the entire row for Lesotho in 2022
print(lesotho_2022)

     Region, subregion, country or area  Year  Gender       0       1       2  \
5214                            Lesotho  2022  Female  28.048  27.343  27.136   

           3      4       5       6       7       8       9      10      11  \
5214  27.021  26.92  26.844  26.603  26.333  26.279  26.003  25.496  24.785   

          12      13      14     15      16      17      18      19      20  \
5214  24.253  24.221  24.201  23.72  23.095  22.936  23.076  23.014  22.504   

          21      22      23      24      25      26      27      28      29  \
5214  21.919  21.584  21.558  21.655  21.504  20.991  20.466  20.194  20.132   

          30      31      32      33      34      35      36      37      38  \
5214  19.979  19.616  19.245  18.898  18.388  17.535  16.636  15.909  15.137   

          39     40      41      42      43      44      45     46     47  \
5214  14.314  13.51  12.757  12.055  11.396  10.807  10.223  9.585  8.962   

        48     49     50     51     52    

In [5]:
# Getting pregnant during breastfeeding years is not accounted for 
# 2. Subtract the numbers in the 'Pregnant_14' to 'Pregnant_49' columns from the corresponding female single age groups
pregnant_columns = {f'Pregnant_{age}': age for age in range(14, 50)}
for col, age in pregnant_columns.items():
    merged_df[str(age)] -= merged_df[col] / 1000

# 3. Subtract the numbers in the 'Breastfeeding_15' to 'Breastfeeding_49' columns from the corresponding female single age groups
breastfeeding_columns = {f'Breastfeeding_{age}': age for age in range(15, 50)}
for col, age in breastfeeding_columns.items():
    merged_df[str(age)] -= merged_df[col] / 1000

# Optionally, save the updated DataFrame to a new CSV file
merged_df.to_csv('updated_nutritional_data.csv', index=False)

In [None]:
# Set pandas display option to show all columns
pd.set_option('display.max_columns', None)

# Filter the DataFrame to show the row for Lesotho in 2022
lesotho_2022 = merged_df[(merged_df['Year'] == 2022) & (merged_df['Region, subregion, country or area'] == 'Lesotho')]

# Display the entire row for Lesotho in 2022
print(lesotho_2022)

In [None]:
# Set pandas display option to show all columns
pd.set_option('display.max_columns', None)

# Filter the DataFrame to show the row for Lesotho in 2022
lesotho_2022 = merged_df[(merged_df['Year'] == 2022) & (merged_df['Region, subregion, country or area'] == 'Lesotho')]

# Display the entire row for Lesotho in 2022
print(lesotho_2022)

In [8]:
import pandas as pd

# Example DataFrame (replace with your actual DataFrame)
# merged_df = ...

# Columns to drop and save
columns_to_save = ['15_y', '16_y', '17_y', '18_y', '19_y', '20_y', '21_y', '22_y', '23_y', '24_y',
                   '25_y', '26_y', '27_y', '28_y', '29_y', '30_y', '31_y', '32_y', '33_y', '34_y',
                   '35_y', '36_y', '37_y', '38_y', '39_y', '40_y', '41_y', '42_y', '43_y', '44_y',
                   '45_y', '46_y', '47_y', '48_y', '49_y',
                   'Births_15', 'Births_16', 'Births_17', 'Births_18', 'Births_19', 'Births_20',
                   'Births_21', 'Births_22', 'Births_23', 'Births_24', 'Births_25', 'Births_26',
                   'Births_27', 'Births_28', 'Births_29', 'Births_30', 'Births_31', 'Births_32',
                   'Births_33', 'Births_34', 'Births_35', 'Births_36', 'Births_37', 'Births_38',
                   'Births_39', 'Births_40', 'Births_41', 'Births_42', 'Births_43', 'Births_44',
                   'Births_45', 'Births_46', 'Births_47', 'Births_48', 'Births_49']

# Create a new DataFrame to save the dropped columns
saved_data = merged_df[columns_to_save].copy()

# Drop the columns from merged_df
merged_df.drop(columns_to_save, axis=1, inplace=True)

In [9]:
# Adjust the merge keys as per your actual column names
merge_keys = ['Region, subregion, country or area', 'Year',]

# Perform inner merge on the common keys
merged_data = pd.merge(merged_df, df_male, on=merge_keys, suffixes=('_female', '_male'))

In [10]:
# Assuming merged_data is your DataFrame with the specified columns
merged_data = merged_data.drop(columns=['Gender_female', 'Gender_male'])

In [None]:
columns_list = merged_data.columns.tolist()
print(columns_list)

['Region, subregion, country or area', 'Year', '0_female', '1_female', '2_female', '3_female', '4_female', '5_female', '6_female', '7_female', '8_female', '9_female', '10_female', '11_female', '12_female', '13_female', '14_female', '15_female', '16_female', '17_female', '18_female', '19_female', '20_female', '21_female', '22_female', '23_female', '24_female', '25_female', '26_female', '27_female', '28_female', '29_female', '30_female', '31_female', '32_female', '33_female', '34_female', '35_female', '36_female', '37_female', '38_female', '39_female', '40_female', '41_female', '42_female', '43_female', '44_female', '45_female', '46_female', '47_female', '48_female', '49_female', '50_female', '51_female', '52_female', '53_female', '54_female', '55_female', '56_female', '57_female', '58_female', '59_female', '60_female', '61_female', '62_female', '63_female', '64_female', '65_female', '66_female', '67_female', '68_female', '69_female', '70_female', '71_female', '72_female', '73_female', '

In [12]:
# Define the age group mappings
age_groups = {
    'Male': {
        '0': ['0_male'],
        '1-3': [str(i) + '_male' for i in range(1, 4)],
        '4-8': [str(i) + '_male' for i in range(4, 9)],
        '9-13': [str(i) + '_male' for i in range(9, 14)],
        '14-18': [str(i) + '_male' for i in range(14, 19)],
        '19-50': [str(i) + '_male' for i in range(19, 51)],
        '51-70': [str(i) + '_male' for i in range(51, 71)],
        '71-100+': [str(i) + '_male' for i in range(71, 100)] + ['100+_male']
    },
    'Female': {
        '0': ['0_female'],
        '1-3': [str(i) + '_female' for i in range(1, 4)],
        '4-8': [str(i) + '_female' for i in range(4, 9)],
        '9-13': [str(i) + '_female' for i in range(9, 14)],
        '14-18': [str(i) + '_female' for i in range(14, 19)],
        '19-50': [str(i) + '_female' for i in range(19, 51)],
        '51-70': [str(i) + '_female' for i in range(51, 71)],
        '71-100+': [str(i) + '_female' for i in range(71, 100)] + ['100+_female']
    },
    'Pregnant': {
        '15-19': ['Pregnant_' + str(i) for i in range(15, 20)],
        '20-49': ['Pregnant_' + str(i) for i in range(20, 50)]
    },
    'Breastfeeding': {
        '15-19': ['Breastfeeding_' + str(i) for i in range(15, 20)],
        '20-49': ['Breastfeeding_' + str(i) for i in range(20, 50)]
    }
}

# Create a list of columns to keep based on the age group mappings
columns_to_keep = ['Region, subregion, country or area', 'Year', 'Total_Births']

for category, groups in age_groups.items():
    for age_range, columns in groups.items():
        # Aggregate columns for the current age group
        merged_data[f'{age_range}_{category}'] = merged_data[columns].sum(axis=1)
        # Drop individual columns after aggregation
        merged_data.drop(columns, axis=1, inplace=True)
        # Add aggregated column to columns_to_keep
        columns_to_keep.append(f'{age_range}_{category}')

# Filter the DataFrame to keep only the desired columns
merged_data_filtered = merged_data[columns_to_keep]

# Print the head of the filtered DataFrame
print(merged_data_filtered.head())

  Region, subregion, country or area  Year  Total_Births     0_Male  \
0                              WORLD  2022  1.334320e+08  67176.812   
1                              WORLD  2023  1.337286e+08  67255.589   
2                              WORLD  2024  1.339417e+08  67377.156   
3                              WORLD  2025  1.341781e+08  67469.978   
4                              WORLD  2026  1.343820e+08  67560.160   

     1-3_Male    4-8_Male   9-13_Male  14-18_Male   19-50_Male  51-70_Male  \
0  203633.893  352108.035  347175.416  328608.847  1822774.131  695463.489   
1  201144.888  349925.120  349071.404  332414.985  1834624.400  709314.650   
2  199880.217  346716.225  350396.930  336103.287  1847329.064  723375.321   
3  199791.272  342475.197  351340.684  339530.956  1860642.082  736787.870   
4  200094.488  337870.051  351582.450  342860.264  1875160.084  748914.886   

   ...  4-8_Female  9-13_Female   14-18_Female  19-50_Female  51-70_Female  \
0  ...  331378.299   32526

In [11]:
# Load your data
url_nutritional = 'https://raw.githubusercontent.com/kocsigabor99/MAJOR-CROPS-FAODATA/main/Micronutrient%20Needs%20by%20single%20year%20Pregnant%20and%20Breastfeeding%20included.csv'
nutritional_df = pd.read_csv(url_nutritional)

In [13]:
# Initialize a dictionary to collect results
result_dict = {
    'Region, subregion, country or area': merged_data_filtered['Region, subregion, country or area'],
    'Year': merged_data_filtered['Year']
}

# Function to add nutrient data to the result_dict
def add_nutrient_data(age_group, gender, condition=None):
    # Filter for age groups using string comparison
    nutrient_requirements = nutritional_df[
        (nutritional_df['Age'] == age_group) & 
        (nutritional_df['Gender'] == gender)
    ]
    
    if condition:
        nutrient_requirements = nutrient_requirements[
            (nutrient_requirements['Breastfeeding/Pregnant'] == condition)
        ]
    else:
        nutrient_requirements = nutrient_requirements[
            (nutritional_df['Breastfeeding/Pregnant'].isnull()) | 
            (nutritional_df['Breastfeeding/Pregnant'] == 'No')
        ]

    print(f"Age Group: {age_group}, Gender: {gender}, Condition: {condition}")
    print(f"Filtered Nutrient Requirements:\n{nutrient_requirements}\n")

    if not nutrient_requirements.empty:
        daily_requirements = nutrient_requirements.iloc[0]  # Assuming only one row matches the filter

        for nutrient in daily_requirements.index[3:]:  # Skip the first three columns (Age, Gender, Breastfeeding/Pregnant)
            if condition:
                column_name = f'{age_group}_{condition}'
            else:
                column_name = f'{age_group}_{gender.capitalize()}'
            
            # Check if column_name exists in merged_data_filtered
            if column_name in merged_data_filtered.columns:
                result_dict[f'{column_name}_{nutrient}'] = merged_data_filtered[column_name] * daily_requirements[nutrient]

# Extract age groups from merged_data column headers
age_groups_female = sorted(set([col.split('_')[0] for col in merged_data_filtered.columns if col.endswith('_Female')]))
age_groups_male = sorted(set([col.split('_')[0] for col in merged_data_filtered.columns if col.endswith('_Male')]))

# Extract special age groups for pregnant and breastfeeding women
special_age_groups = ['15-19', '20-49']
conditions = ['Pregnant', 'Breastfeeding']

# Step 1: Calculate nutrient requirements for all age groups and genders
for age_group in age_groups_female:
    print(f"Processing Female Age Group: {age_group}")
    add_nutrient_data(age_group, 'Female')

for age_group in age_groups_male:
    print(f"Processing Male Age Group: {age_group}")
    add_nutrient_data(age_group, 'Male')

# Step 2: Calculate nutrient requirements for special conditions (Pregnant and Breastfeeding)
for age_group in special_age_groups:
    for condition in conditions:
        print(f"Processing Female Age Group: {age_group} for {condition}")
        add_nutrient_data(age_group, 'Female', condition)

# Create the result_df DataFrame from result_dict
result_df = pd.DataFrame(result_dict)

# Debug output
print(result_df)

Processing Female Age Group: 0
Age Group: 0, Gender: Female, Condition: None
Filtered Nutrient Requirements:
    Age  Gender Breastfeeding/Pregnant  Vitamin A  Vitamin B1  Vitamin B2  \
101   0  Female                     No        500         0.3         0.3   

     Vitamin B3  Vitamin B5  Vitamin B6  Vitamin B7  ...  Iodine  Iron heme  \
101           4         1.8         0.3           6  ...     130         11   

     Iron (non-heme)  Magnesium  Manganese  Molybdenum  Phosporus  Potassium  \
101               22         75        0.6           3        275        860   

     Selenium  Zinc  
101        20   220  

[1 rows x 28 columns]

Processing Female Age Group: 1-3
Age Group: 1-3, Gender: Female, Condition: None
Filtered Nutrient Requirements:
Empty DataFrame
Columns: [Age, Gender, Breastfeeding/Pregnant, Vitamin A, Vitamin B1, Vitamin B2, Vitamin B3, Vitamin B5, Vitamin B6, Vitamin B7, Vitamin B9, Vitamin B12, Vitamin C, Vitamin D, Vitamin E, Vitamin K, Calcium, Copper, Iod

  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[
  nutrient_requirements = nutrient_requirements[


In [14]:
!pip install streamlit
import streamlit as st

# Define unique options for each dropdown
regions = result_df['Region, subregion, country or area'].unique()
years = result_df['Year'].unique()

# Vitamins/Minerals columns based on your dataset
vitamins_minerals = [col for col in result_df.columns if col.endswith((
    '_Vitamin A', '_Vitamin B1', '_Vitamin B2', '_Vitamin B3', '_Vitamin B5',
    '_Vitamin B6', '_Vitamin B7', '_Vitamin B9', '_Vitamin B12', '_Vitamin C',
    '_Vitamin D', '_Vitamin E', '_Vitamin K', '_Calcium', '_Copper', '_Iodine',
    '_Iron heme', '_Iron (non-heme)', '_Magnesium', '_Manganese', '_Molybdenum',
    '_Phosphorus', '_Potassium', '_Selenium', '_Zinc'
))]

# Create Streamlit sidebar widgets for selection
selected_region = st.sidebar.multiselect('Select Region', regions)
selected_year = st.sidebar.multiselect('Select Year', years)
selected_vitamin_mineral = st.sidebar.selectbox('Select Vitamin/Mineral', ['Select All'] + vitamins_minerals)

# Filtering logic based on user selections
filtered_data = result_df[
    (result_df['Region, subregion, country or area'].isin(selected_region)) &
    (result_df['Year'].isin(selected_year))
]

if selected_vitamin_mineral != 'Select All':
    filtered_data = filtered_data[['Region, subregion, country or area', 'Year', selected_vitamin_mineral]]

# Display filtered data or any other output as needed
st.write(filtered_data)

Collecting streamlit
  Downloading streamlit-1.36.0-py2.py3-none-any.whl (8.6 MB)
     ---------------------------------------- 0.0/8.6 MB ? eta -:--:--
     ---------------------------------------- 0.1/8.6 MB 2.0 MB/s eta 0:00:05
      --------------------------------------- 0.2/8.6 MB 2.3 MB/s eta 0:00:04
     - -------------------------------------- 0.3/8.6 MB 2.3 MB/s eta 0:00:04
     - -------------------------------------- 0.3/8.6 MB 2.1 MB/s eta 0:00:04
     -- ------------------------------------- 0.5/8.6 MB 2.3 MB/s eta 0:00:04
     -- ------------------------------------- 0.6/8.6 MB 2.2 MB/s eta 0:00:04
     --- ------------------------------------ 0.7/8.6 MB 2.2 MB/s eta 0:00:04
     --- ------------------------------------ 0.8/8.6 MB 2.3 MB/s eta 0:00:04
     ---- ----------------------------------- 1.0/8.6 MB 2.4 MB/s eta 0:00:04
     ----- ---------------------------------- 1.1/8.6 MB 2.5 MB/s eta 0:00:04
     ----- ---------------------------------- 1.2/8.6 MB 2.4 MB/s e


[notice] A new release of pip is available: 23.0.1 -> 24.1.2
[notice] To update, run: C:\Users\kocsi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
2024-07-09 12:02:56.620 
  command:

    streamlit run C:\Users\kocsi\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-07-09 12:02:56.620 Session state does not function when running a script without `streamlit run`


In [15]:
streamlit run /home/codespace/.local/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-07-09 08:53:46.030 Session state does not function when running a script without `streamlit run`

SyntaxError: leading zeros in decimal integer literals are not permitted; use an 0o prefix for octal integers (279552507.py, line 2)

: 