In [1]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

from res_ind_lib import average_over_rp

import glob

from progress_reporter import progress_reporter

%matplotlib inline
%load_ext autoreload

%autoreload 2

In [2]:
import numpy
def wp(data, wt, percentiles,cum=False): 
	"""Compute weighted percentiles. 
	If the weights are equal, this is the same as normal percentiles. 
	Elements of the C{data} and C{wt} arrays correspond to 
	each other and must have equal length (unless C{wt} is C{None}). 

	@param data: The data. 
	@type data: A L{numpy.ndarray} array or a C{list} of numbers. 
	@param wt: How important is a given piece of data. 
	@type wt: C{None} or a L{numpy.ndarray} array or a C{list} of numbers. 
		 All the weights must be non-negative and the sum must be 
		 greater than zero. 
	@param percentiles: what percentiles to use.  (Not really percentiles, 
		 as the range is 0-1 rather than 0-100.) 
	@type percentiles: a C{list} of numbers between 0 and 1. 
	@rtype: [ C{float}, ... ] 
	@return: the weighted percentiles of the data. 
	"""
	assert numpy.greater_equal(percentiles, 0.0).all(), "Percentiles less than zero" 
	assert numpy.less_equal(percentiles, 1.0).all(), "Percentiles greater than one" 
	data = numpy.asarray(data) 
	# data = numpy.reshape(data,(len(data)))
	assert len(data.shape) == 1 
	if wt is None: 
		 wt = numpy.ones(data.shape, numpy.float) 
	else: 
		 wt = numpy.asarray(wt, numpy.float) 
		 # wt = numpy.reshape(wt,(len(wt)))
		 assert wt.shape == data.shape 
		 assert numpy.greater_equal(wt, 0.0).all(), "Not all weights are non-negative." 
	i = numpy.argsort(data) 
	sd = numpy.take(data, i, axis=0)
	sw = numpy.take(wt, i, axis=0) 
	aw = numpy.add.accumulate(sw) 
	if not aw[-1] > 0: 
		 raise ValueError("Nonpositive weight sum" )
	w = (aw)/aw[-1] 
	spots = numpy.searchsorted(w, percentiles) 
	if cum:
		sd = numpy.add.accumulate(numpy.multiply(sd,sw))
	f = interp1d(w,sd)
	return f(percentiles)
	

In [3]:
from tempfile import mkstemp, SpooledTemporaryFile
from shutil import move
from os import remove, close
from  io import StringIO

def res_to_csv(file_path, new_path=None):
    #Create temp file
    new_file =StringIO()
    
    #actually replaces text line by line
    
    data = False
    

    with open(file_path) as old_file:
        for line in old_file:
            if line.startswith("Temporalidad"): #begining of data
                data=True
                colindex = line.find("Frecuencia")

            if line.startswith("Curva"):
                break

            if data:   
                try:
                    new_file.write(line[colindex:].replace("NeuN"," 0 "))
                except IndexError:
                    print("one line omited")    
    

    
    #closes the temporary file
    return new_file


In [4]:
return_periods = np.array( [ 0.1,.5,1,1.5, 2,3, 5, 10, 20, 50, 100, 250, 500, 1000, 1500, 2000, 4000, 5000, 30e3])



In [5]:
iso2bank = pd.read_csv("inputs/iso3_to_wb_name.csv",index_col="iso3", squeeze=True)

In [6]:

res_file_list = glob.glob("D:/events/*/*.res")





In [7]:
PML_guessed = pd.DataFrame()

In [8]:
myfile = res_file_list[0]

IndexError: list index out of range

In [9]:
res_file_list

[]

In [None]:

for myfile in res_file_list:

    #parse filename
    path, name = glob.os.path.split(myfile)
    hazard = path.split("\\")[-1]
    try:
        country_name = iso2bank[name.split("_")[1]]
    except KeyError:
        print("ignored " +myfile)
        continue
    
    
    if ((hazard,country_name) in PML_guessed):
        continue
    
    progress_reporter((hazard,country_name))

    try:
        #open file and parse it to memory file
        memory_file = res_to_csv(myfile)
        #gets back to begining or IO
        memory_file.seek(0)

        #read data
        data = pd.read_csv(memory_file,sep=" *", engine="python",usecols=["Frecuencia","EP"])
        data= data.sort_values(by="EP", ascending=False)[["EP","Frecuencia"]]

        data["rp"]=1/data.Frecuencia.cumsum()
        data.head()

        #interpolates RP
        series = pd.DataFrame(interp1d(data.rp,data.EP, bounds_error=False)(return_periods), 
                              index=pd.Index(return_periods,name="rp"))

        #scales fore same average capital losses
        series *= (data.EP*data.Frecuencia).sum()/ average_over_rp(series.stack()).squeeze()

        PML_guessed[(hazard,country_name)]=series.squeeze()
    except:
        pass


In [None]:
PML_guessed.head()

In [None]:
df=PML_guessed.copy()
df.columns=pd.MultiIndex.from_tuples(PML_guessed.columns)
df=df.stack().stack()
df.index.names=["rp","country","hazard"]
df.swaplevel(0,1).swaplevel(1,2).sort_index().to_csv("intermediate/capital_losses_from_GAR_events.csv", header=True)

In [None]:
average_over_rp(df).sum()

In [None]:
sum=0

for myfile in res_file_list:

    #parse filename
    path, name = glob.os.path.split(myfile)
    hazard = path.split("\\")[-1]
    
    if hazard !="surge":
        continue

    try:
        #open file and parse it to memory file
        memory_file = res_to_csv(myfile)
        #gets back to begining or IO
        memory_file.seek(0)

        #read data
        data = pd.read_csv(memory_file,sep=" *", engine="python",usecols=["Frecuencia","EP"])
        data= data.sort_values(by="EP", ascending=False)[["EP","Frecuencia"]]   

        #scales fore same average capital losses
        sum+= (data.EP*data.Frecuencia).sum()
    except:
        print("passing "+myfile)




In [None]:
49788 + 247608

In [None]:
suma

In [None]:
sum

In [None]:
myfile = "D:/events\wind\africa_REU_Wd_Total.res"

In [None]:
path, name = glob.os.path.split(myfile)
hazard = path.split("\\")[-1]
try:
    country_name = iso2bank[name.split("_")[1]]
except KeyError:
    print("ignored " +myfile)


In [None]:
memory_file = res_to_csv(myfile)
#gets back to begining or IO
memory_file.seek(0)

#read data
data = pd.read_csv(memory_file,sep=" *", engine="python",usecols=["Frecuencia","EP"])
data= data.sort_values(by="EP", ascending=False)[["EP","Frecuencia"]]

data["rp"]=1/data.Frecuencia.cumsum()
data.head()

#interpolates RP
series = pd.DataFrame(interp1d(data.rp,data.EP, bounds_error=False)(return_periods), 
                      index=pd.Index(return_periods,name="rp"))

#scales fore same average capital losses
series *= (data.EP*data.Frecuencia).sum()/ average_over_rp(series.stack()).squeeze()

PML_guessed[(hazard,country_name)]=series.squeeze()