In [17]:
import json
import yfinance as yf
import pandas as pd
import requests
import os
from datetime import datetime

energy_sector = [
    {"sector": "Oil&Gas",
     "companies": [
         {"name": "Repsol", "country": "Spain", "symbol": "REP.MC" , "data": []},
         {"name": "Exxon Mobil Corporation", "country": "USA", "symbol": "XOM", "data": []}
     ]},
    {"sector": "ElectricPower",
     "companies": [
         {"name": "Iberdrola", "country": "Spain", "symbol": "IBE.MC", "data": []},
         {"name": "NextEra Energy, Inc.", "country": "USA", "symbol": "NEE", "data": []}
     ]},
    {"sector": "RenewableEnergy",
     "companies": [
         {"name": "Acciona", "country": "Spain", "symbol": "ANA.MC", "data": []},
         {"name": "Solaria", "country": "Spain", "symbol": "SOL", "data": []},
         {"name": "Solaredge Technologies Inc.", "country": "USA", "symbol": "SEDG", "data": []}
     ]},
    {"sector": "EnergyServices",
     "companies": [
         {"name": "Técnicas Reunidas", "country": "Spain", "symbol": "TRE.MC", "data": []},
         {"name": "Honeywell International Inc.", "country": "USA", "symbol": "HON", "data": []}
     ]},
    {"sector": "Infrastructure",
     "companies": [
         {"name": "Enagas", "country": "Spain", "symbol": "ENG.MC", "data": []},
         {"name": "Kinder Morgan, Inc.", "country": "USA", "symbol": "KMI", "data": []}
     ]}
]


now_date = datetime.now()       

 
#https://ucdp.uu.se/apidocs/
base_url = "https://ucdpapi.pcr.uu.se/api/gedevents/23.1"

start_date = datetime(2000, 1, 1)  
end_date = datetime(now_date.year , now_date.month , now_date.day )

all_data = []

current_date = start_date
parameters = {
    "pagesize": 10000, 
    "format": "json",        
    "StartDate": start_date.strftime('%Y-%m-%d'),  
    "EndDate": end_date.strftime('%Y-%m-%d') 
}

response = requests.get(base_url, params=parameters)
data = json.loads(response.text)
page = 1
total_pages = data['TotalPages']

next_page = data['NextPageUrl']
if response.status_code == 200:
    all_data.extend(data['Result'])
df = pd.DataFrame(all_data)
while page <total_pages:    
    try:
        if next_page!= "":
            response = requests.get(next_page)    
            data = json.loads(response.text)
            next_page = data['NextPageUrl']
            if response.status_code == 200:                     
                all_data.extend(data['Result'])             
            else:
                print(f"Error al obtener los datos del UCDP para la fecha {current_date}.")
            
            print(f"total_pages: {total_pages} ")    
            page=page+1
            print(print(f"actual page: {page} "))
        else:
            break
        
    except:
        print("Error")
    
df = pd.DataFrame(all_data)   
df.to_csv('ucdp_data_events_2000_2024_01.csv', index=False)

base_url = "https://ucdpapi.pcr.uu.se/api/ucdpprioconflict/23.1"

end_date = datetime(now_date.year , now_date.month , now_date.day )

all_data = []

response = requests.get(base_url, params=parameters)
data = json.loads(response.text)
page = 1
total_pages = data['TotalPages']
print(f"total_pages: {total_pages} ")    

next_page = data['NextPageUrl']
if response.status_code == 200:
    all_data.extend(data['Result'])

while page < total_pages:    
    response = requests.get(next_page)    
    data = json.loads(response.text)
    next_page = data['NextPageUrl']
    if response.status_code == 200:      
       
        all_data.extend(data['Result'])
    else:
        print(f"Error al obtener los datos del UCDP para la fecha {current_date}.")
        
    page=page+1
    print(print(f"actual page: {page} "))

df = pd.DataFrame(all_data)
df.to_csv('ucdp_data_conflicts_2000_2024_01.csv', index=False)   


if not os.path.exists('docs'):
    os.makedirs('docs')

for sector in energy_sector:
    for company in sector['companies']:
        company_data = yf.download(company['symbol'], start='2000-01-01', end=now_date)        
        file_name = f"{company['name']}_data.csv"
        company_data.to_csv(os.path.join('docs', file_name))  
        company['data'] = company_data



KeyboardInterrupt



In [None]:
import pandas as pd
# Cargar los datos de conflictos y eventos desde los CSV
df_conflictos = pd.read_csv('ucdp_data_conflicts_2000_2024.csv', low_memory=False)
df_eventos = pd.read_csv('ucdp_data_events_2000_2024.csv', low_memory=False)


In [None]:
df_conflictos.head()

In [None]:
df_eventos.head()

In [None]:
columnas_a_eliminar  = df_conflictos.columns.intersection(df_eventos.columns)
columnas_a_eliminar = columnas_a_eliminar.drop(labels=['id','year'])

columnas_a_eliminar

In [None]:
df_eventos.columns

In [None]:
df_conflictos.columns

In [36]:

df_eventos = df_eventos.rename(columns={'conflict_dset_id': 'conflict_id'})
# Unir los DataFrames por la columna 'conflict_id'
df_merged = pd.merge(df_conflictos, df_eventos, on='conflict_id', how='inner')

# Verificar el DataFrame resultante
df_merged.columns

Index(['conflict_id', 'location', 'side_a_x', 'side_a_Id', 'side_a_2nd',
       'side_b_x', 'side_b_Id', 'side_b_2nd', 'incompatibility',
       'territory_name', 'year_x', 'intensity_level', 'cumulative_intensity',
       'type_of_conflict', 'start_date', 'start_prec', 'start_date2',
       'start_prec2', 'ep_end', 'ep_end_date', 'ep_end_prec', 'gwno_a',
       'gwno_a_2nd', 'gwno_b', 'gwno_b_2nd', 'gwno_loc', 'region_x', 'version',
       'id', 'relid', 'year_y', 'active_year', 'code_status',
       'type_of_violence', 'conflict_new_id', 'conflict_name', 'dyad_dset_id',
       'dyad_new_id', 'dyad_name', 'side_a_dset_id', 'side_a_new_id',
       'side_a_y', 'side_b_dset_id', 'side_b_new_id', 'side_b_y',
       'number_of_sources', 'source_article', 'source_office', 'source_date',
       'source_headline', 'source_original', 'where_prec', 'where_coordinates',
       'where_description', 'adm_1', 'adm_2', 'latitude', 'longitude',
       'geom_wkt', 'priogrid_gid', 'country', 'country_i

In [18]:
df_merged.to_csv('ucdp_data_conflicts_and_events_2000_2024_FULL.csv', index=False)   

In [19]:
df_merged.head()

Unnamed: 0,conflict_id,location,side_a_x,side_a_Id,side_a_2nd,side_b_x,side_b_Id,side_b_2nd,incompatibility,territory_name,...,date_end,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best,high,low,gwnoa,gwnob
0,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,2009-08-09T00:00:00,4,10,0,0,14,14,14,,
1,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,2009-11-15T00:00:00,1,0,0,0,1,1,1,,
2,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,2009-07-04T00:00:00,0,0,0,16,16,16,16,,
3,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,2009-07-14T00:00:00,3,15,0,0,18,18,18,,
4,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,2009-10-03T00:00:00,1,0,0,0,1,1,1,,


In [30]:
# conflict_id_counts = df_eventos['conflict_id'].value_counts()

# Crear un diccionario con los conteos de conflict_id
# conflict_id_counts_dict = conflict_id_counts.to_dict()

# Agregar una nueva columna al DataFrame df_merged con los conteos de conflict_id
# df_merged['number_of_events'] = df_merged['conflict_id'].map(conflict_id_counts_dict)
df_merged.head()

Unnamed: 0,conflict_id,location,side_a_x,side_a_Id,side_a_2nd,side_b_x,side_b_Id,side_b_2nd,incompatibility,territory_name,...,deaths_b,deaths_civilians,deaths_unknown,best,high,low,gwnoa,gwnob,conflict_id_counts,number_of_events
0,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,10,0,0,14,14,14,,,168,84
1,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,0,0,0,1,1,1,,,168,84
2,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,0,0,16,16,16,16,,,168,84
3,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,15,0,0,18,18,18,,,168,84
4,11342,India,Government of India,141,,GNLA,1163,,1,Garoland,...,0,0,0,1,1,1,,,168,84


In [42]:
selected_columns = df_merged[['conflict_id','conflict_name', 'date_start', 'date_end', 'best', 'deaths_civilians','deaths_unknown','type_of_violence','latitude', 'longitude', 'country', 'region_x']]
selected_columns.head()

Unnamed: 0,conflict_id,conflict_name,date_start,date_end,best,deaths_civilians,deaths_unknown,type_of_violence,latitude,longitude,country,region_x
0,11342,Lashkar of Mohmand tribe - TTP,2009-08-09T00:00:00,2009-08-09T00:00:00,14,0,0,2,34.46386,71.351161,Pakistan,3
1,11342,Lashkar of Mohmand tribe - TTP,2009-11-15T00:00:00,2009-11-15T00:00:00,1,0,0,2,34.814722,71.421667,Pakistan,3
2,11342,Lashkar of Mohmand tribe - TTP,2009-07-04T00:00:00,2009-07-04T00:00:00,16,0,16,2,34.46386,71.351161,Pakistan,3
3,11342,Lashkar of Mohmand tribe - TTP,2009-07-14T00:00:00,2009-07-14T00:00:00,18,0,0,2,34.594755,71.455747,Pakistan,3
4,11342,Lashkar of Mohmand tribe - TTP,2009-10-03T00:00:00,2009-10-03T00:00:00,1,0,0,2,34.8,71.466667,Pakistan,3


In [45]:
#grouped_conflicts = selected_columns.agg({
#    'date_start': 'min', 
#    'date_end': 'max',     # Selecciona la fecha de fin más tardía
#    'best': 'min',         # Suma la gravedad total del conflicto
#    'deaths_civilians': 'sum',  # Suma el número total de civiles muertos
#    'deaths_unknown': 'sum',     # Suma el número total de muertes de origen desconocido
#    'type_of_violence': 'first',  # Toma el primer valor de tipo de violencia (puede ser arbitrario)
#    'latitude': 'mean',    # Calcula la media de latitud
#    'longitude': 'mean',   # Calcula la media de longitud
#    'country': 'first',    # Toma el primer valor de país (puede ser arbitrario)
#    'region_x': 'first'      # Toma el primer valor de región (puede ser arbitrario)
#}).reset_index()

selected_columns = selected_columns.sort_values(by='date_start')
selected_columns.to_csv('docs/conflicts_events_FULL_filtered.csv', index=False)
selected_columns.head()

Unnamed: 0,conflict_id,conflict_name,date_start,date_end,best,deaths_civilians,deaths_unknown,type_of_violence,latitude,longitude,country,region_x
3027913,329,Ethiopia: Ogaden,2000-01-01T00:00:00,2000-01-23T00:00:00,0,0,0,1,7.0,44.0,Ethiopia,4
3016205,327,Angola: Government,2000-01-01T00:00:00,2000-11-10T00:00:00,0,0,0,1,-13.0,20.5,Angola,4
5158653,364,India: Kashmir,2000-01-01T00:00:00,2000-10-15T00:00:00,153,0,153,1,33.91667,76.66667,India,3
5256659,386,Algeria: Government,2000-01-01T00:00:00,2000-01-01T00:00:00,2,0,0,1,36.855833,3.866667,Algeria,4
4866206,352,Chechen Republic of Ichkeria - Civilians,2000-01-01T00:00:00,2000-01-11T00:00:00,0,0,0,3,43.3058,45.7477,Russia (Soviet Union),3


In [44]:
df_merged.describe()

Unnamed: 0,conflict_id,incompatibility,year_x,intensity_level,cumulative_intensity,type_of_conflict,start_prec,start_prec2,ep_end,ep_end_prec,...,event_clarity,date_prec,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best,high,low,gwnob
count,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,0.0,...,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,5517264.0,27725.0
mean,671.2921,1.750931,1999.114,1.52542,0.8890771,3.378281,2.923012,1.765493,0.06694278,,...,1.090623,1.191328,1.445484,2.761214,1.14008,0.9150322,6.26181,7.910367,5.924867,699.863192
std,2155.841,0.4345621,17.81678,0.4993534,0.3140367,0.4962111,2.287665,1.452775,0.2499229,,...,0.2870717,0.5126483,42.86608,19.92026,8.333316,154.3954,163.8962,169.7508,163.8417,149.442258
min,205.0,1.0,1946.0,1.0,0.0,1.0,1.0,1.0,0.0,,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,369.0
25%,289.0,2.0,1986.0,1.0,1.0,3.0,1.0,1.0,0.0,,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,770.0
50%,309.0,2.0,2003.0,2.0,1.0,3.0,1.0,1.0,0.0,,...,1.0,1.0,0.0,0.0,0.0,0.0,2.0,3.0,2.0,770.0
75%,333.0,2.0,2014.0,2.0,1.0,4.0,6.0,1.0,0.0,,...,1.0,1.0,1.0,2.0,0.0,0.0,5.0,6.0,5.0,770.0
max,15438.0,3.0,2022.0,2.0,1.0,4.0,6.0,6.0,1.0,,...,2.0,5.0,14162.0,6916.0,5964.0,75340.0,75340.0,74256.0,75482.0,800.0


In [35]:
df_eventos[df_eventos["conflict_id"]==364].count()

id                   5800
relid                5800
year                 5800
active_year          5800
code_status          5800
type_of_violence     5800
conflict_id          5800
conflict_new_id      5800
conflict_name        5800
dyad_dset_id         5800
dyad_new_id          5800
dyad_name            5800
side_a_dset_id       5800
side_a_new_id        5800
side_a               5800
side_b_dset_id       5800
side_b_new_id        5800
side_b               5800
number_of_sources    5800
source_article       5800
source_office        3649
source_date          3649
source_headline      3649
source_original      5346
where_prec           5800
where_coordinates    5800
where_description    5715
adm_1                5768
adm_2                5522
latitude             5800
longitude            5800
geom_wkt             5800
priogrid_gid         5800
country              5800
country_id           5800
region               5800
event_clarity        5800
date_prec            5800
date_start  