## 统计从未获奖的国家

In [14]:
import pandas as pd

# 加载CSV文件
file_path = r'2025_Problem_C_Data/summerOly_athletes.csv'
data = pd.read_csv(file_path)  # 读取CSV文件到DataFrame中

# 筛选出获得奖牌的国家（Medal列不为 "No medal"）
countries_with_medals = data[data['Medal'] != "No medal"]['NOC'].unique()

# 获取所有国家的唯一代码（NOC）
all_countries = data[['NOC', 'Team']].drop_duplicates()  # 保留唯一的NOC和Team组合

# 识别从未获得奖牌的国家（将所有国家减去获得过奖牌的国家）
countries_no_medals = all_countries[~all_countries['NOC'].isin(countries_with_medals)]

# 将结果转换为DataFrame以便更好地显示
countries_no_medals_df = countries_no_medals.rename(columns={'Team': 'Country Name'})[['NOC', 'Country Name']]

# 保存结果到CSV文件
output_path = 'countries_no_medals_with_names.csv'  # 输出文件路径
countries_no_medals_df.to_csv(output_path, index=False, encoding='utf-8')  # 保存为CSV文件

# 输出结果
print(f"从未获得奖牌的国家及名称已保存到文件：{output_path}")


从未获得奖牌的国家及名称已保存到文件：countries_no_medals_with_names.csv


## 插值

### 1.线性差值

### （1）人口

In [15]:
import pandas as pd

# 加载数据文件
file_path = 'data0/API_SP.POP.TOTL_DS2_en_csv_v2_900.csv'
data = pd.read_csv(file_path)

# 将数据从宽格式转换为长格式
data_long = data.melt(
    id_vars=data.columns[:4],  # 保留前四列作为标识
    var_name='Year',
    value_name='Population'
)

# 将'Year'列转换为整数并排序
data_long['Year'] = data_long['Year'].astype(int)
data_long = data_long.sort_values([data.columns[0], 'Year']).reset_index(drop=True)

# 按国家名称分组并进行线性插值
data_long['Population'] = data_long.groupby(data.columns[0])['Population'].transform(
    lambda group: group.interpolate(method='linear')
)

# 将数据重新转换为宽格式
data_interpolated = data_long.pivot(
    index=data.columns[:4].tolist(),
    columns='Year',
    values='Population'
).reset_index()

# 如果需要，将插值后的数据保存为新的CSV文件
output_path = r'data0/Interpolated_Population_Data.csv'
data_interpolated.to_csv(output_path, index=False)

print(f"插值后的数据已保存至：{output_path}")


插值后的数据已保存至：data0/Interpolated_Population_Data.csv


### （2）gdp

In [7]:
import pandas as pd

# 加载数据文件
file_path = 'data0/UNdata_Export_20250124_074035299.csv'
data = pd.read_csv(file_path)

# 将 'Year' 列转换为整数类型
data['Year'] = data['Year'].astype(int)

# 为每个国家生成完整的年份范围
complete_years = (
    data.groupby('Country or Area')['Year']
    .apply(lambda x: pd.Series(range(x.min(), x.max() + 1)))
    .reset_index(level=0)
    .rename(columns={0: 'Year'})
)

# 合并完整年份范围和原始数据
data_complete = pd.merge(complete_years, data, on=['Country or Area', 'Year'], how='left')

# 按国家对数据进行分组，并对缺失的 'Value' 列进行线性插值
data_complete['Value'] = data_complete.groupby('Country or Area')['Value'].transform(
    lambda group: group.interpolate(method='linear')
)

# 如果需要，可以将结果保存为 CSV 文件
output_path = 'data0/Interpolated_gdp_Data.csv'
data_complete.to_csv(output_path, index=False)

print(f"插值后的完整数据已保存至：{output_path}")


插值后的完整数据已保存至：data0/Interpolated_gdp_Data.csv


## GDP、人口归一化

In [8]:
# 导入必要的库
import pandas as pd

# 加载数据集
file_path = 'data0/interpolated_population_data.csv'
# 读取CSV文件并跳过前4行元数据
data = pd.read_csv(file_path)

# 清理数据：提取国家名称和每年人口数据
# 选择需要的列：国家列和各年份的人口数据
data_cleaned = data.iloc[:, [0] + list(range(4, data.shape[1]))].reset_index(drop=True)

# 给列命名：'Country' 和年份列（1960年到2023年）
data_cleaned.columns = ['Country'] + list(map(str, range(1960, 1960 + (data.shape[1] - 4))))

# 将人口数据转换为数值型
data_cleaned.iloc[:, 1:] = data_cleaned.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

# 计算每年全球总人口
global_population = data_cleaned.iloc[:, 1:].sum()

# 计算每个国家的人口占全球总人口的百分比
percentage_population = data_cleaned.iloc[:, 1:].div(global_population, axis=1) * 100

# 将国家名称与百分比数据合并
percentage_population = pd.concat([data_cleaned['Country'], percentage_population], axis=1)

# 输出结果：显示数据的前几行
print(percentage_population.head())

# 或者将结果保存为CSV文件
output_file_path = 'data0/population_percentage.csv'
percentage_population.to_csv(output_file_path, index=False)
print(f"结果已保存为 {output_file_path}")


                       Country      1960      1961      1962      1963  \
0                  Afghanistan  0.029644  0.029817  0.029881  0.029848   
1  Africa Eastern and Southern  0.426769  0.432120  0.435838  0.438012   
2   Africa Western and Central  0.320329  0.322652  0.323625  0.323475   
3                      Albania  0.005279  0.005371  0.005437  0.005478   
4                      Algeria  0.037485  0.037631  0.037495  0.037236   

       1964      1965      1966      1967      1968  ...      2014      2015  \
0  0.029840  0.029860  0.029877  0.029918  0.029973  ...  0.041715  0.042486   
1  0.440570  0.443417  0.446080  0.449103  0.452311  ...  0.751765  0.762430   
2  0.323466  0.323630  0.323752  0.324060  0.324459  ...  0.517730  0.525088   
3  0.005516  0.005548  0.005572  0.005598  0.005635  ...  0.003675  0.003618   
4  0.037032  0.036792  0.036640  0.036668  0.036704  ...  0.049872  0.050257   

       2016      2017      2018      2019      2020      2021      2022  \

In [11]:
import pandas as pd

# Load the dataset
file_path = 'data0/interpolated_gdp_data.csv'
data = pd.read_csv(file_path)

# Filter data for GDP values only
gdp_data = data[data['Item'] == 'Gross Domestic Product (GDP)']

# Calculate the total global GDP for each year
global_gdp = gdp_data.groupby('Year')['Value'].sum().reset_index()
global_gdp = global_gdp.rename(columns={'Value': 'Global_GDP'})

# Merge the global GDP data back with the original dataset
gdp_data_with_global = pd.merge(gdp_data, global_gdp, on='Year')

# Calculate the percentage of each country's GDP relative to the global GDP
gdp_data_with_global['GDP_Percentage'] = (gdp_data_with_global['Value'] / gdp_data_with_global['Global_GDP']) * 100

# Save the resulting dataframe to a new CSV file
output_file_path = 'data0/gdp_percentage.csv'
gdp_data_with_global.to_csv(output_file_path, index=False)

# Display the first few rows to verify the calculation
print(gdp_data_with_global.head())


  Country or Area  Year                          Item       Value  \
0     Afghanistan  1970  Gross Domestic Product (GDP)  162.642176   
1     Afghanistan  1971  Gross Domestic Product (GDP)  166.224832   
2     Afghanistan  1972  Gross Domestic Product (GDP)  141.365323   
3     Afghanistan  1973  Gross Domestic Product (GDP)  149.744069   
4     Afghanistan  1974  Gross Domestic Product (GDP)  181.598776   

      Global_GDP  GDP_Percentage  
0  201579.571187        0.080684  
1  223209.863804        0.074470  
2  259442.366248        0.054488  
3  324985.322708        0.046077  
4  410113.106734        0.044280  
