# Reading csv file

This notebook will show how to read in a csv file used in the HadGEM3-GC3.05 PPE using the 'pandas' package. The csv file defines the parameter perturbations used for different PPE members, and pandas is used to read this into a 'DataFrame' object, from which you can easily get numpy arrays for different parameters/ensemble members, and correlate these with other arrays...


In [2]:
# import the pandas module for reading/analysing the csv file
import pandas as pd

## 1. Loading csv file into pandas

In [3]:
# Load csv file into pandas. Use 'skiprows' to 
ppe_dataframe = pd.read_csv("../data/GA7Parameters_fakedata.csv",
                            skiprows=range(1,24))

# Make some modifications to index the DataFrame using RIPCODEs
ripcode_list = ["r{0:03d}i{1:01d}p{2:05d}".format(r,i,p) for r,i,p in zip(ppe_dataframe['Realisation'].values, 
                                                                          ppe_dataframe['Initialisation'].values, 
                                                                          ppe_dataframe['Parameters'].values)]
ppe_dataframe.index = ripcode_list
# Ignore the first few columns of the csv file
ppe_dataframe = ppe_dataframe.iloc[:,2:]

In [4]:
# Take a look at the DataFrame
print(ppe_dataframe.head(5))

              Realisation  Initialisation  Parameters  ent_fac_dp  ent_fac_md  \
r001i1p00000            1               1           0    1.016496    1.146634   
r001i1p00090            1               1          90    0.750826    1.731638   
r001i1p00605            1               1         605    1.019073    0.418173   
r001i1p00834            1               1         834    0.499738    1.795783   
r001i1p01113            1               1        1113    0.642581    1.488968   

              amdet_fac     r_det  cca_md_knob  cca_dp_knob  cca_sh_knob  ...  \
r001i1p00000   1.746170  0.570518     0.141356     0.257164     0.298939  ...   
r001i1p00090  10.903591  0.567319     0.127835     0.192985     0.646975  ...   
r001i1p00605   1.442231  0.726836     0.373661     0.078245     0.324528  ...   
r001i1p00834   5.783816  0.804711     0.121183     0.409278     0.481623  ...   
r001i1p01113   6.393224  0.897616     0.167089     0.233207     0.207252  ...   

                     r0  r

## 2. Extracting parameter values

### 2.1 For a particular parameter

In [5]:
# Get parameter values for ent_fac_md
param = "ent_fac_md"
param_values = ppe_dataframe[param]

# Print as pandas object
print(param_values)

r001i1p00000    1.146634
r001i1p00090    1.731638
r001i1p00605    0.418173
r001i1p00834    1.795783
r001i1p01113    1.488968
r001i1p01554    1.343862
r001i1p01649    1.833080
r001i1p01843    1.517777
r001i1p01935    1.322011
r001i1p02089    1.606675
r001i1p02123    0.599666
r001i1p02242    1.489616
r001i1p02305    0.956358
r001i1p02335    1.563581
r001i1p02491    0.678419
r001i1p02753    0.858807
r001i1p02832    1.805310
r001i1p02868    1.843024
r001i1p02884    0.877097
r001i1p02914    0.809968
Name: ent_fac_md, dtype: float64


In [6]:
# Print as a numpy array
param_values_array = param_values.values
print(param_values_array)

[1.14663421 1.73163831 0.41817268 1.79578334 1.48896775 1.3438621
 1.83308001 1.51777651 1.32201146 1.60667473 0.59966587 1.48961569
 0.95635754 1.56358111 0.67841864 0.85880689 1.80530972 1.84302414
 0.8770968  0.80996823]


### 2.2 For a particular member

In [7]:
# Get parameter values for member 'r001i1p00834'
member = "r001i1p00834"
member_values = ppe_dataframe.loc[member]

# Print as pandas object
print(member_values)

Realisation                       1
Initialisation                    1
Parameters                      834
ent_fac_dp                 0.499738
ent_fac_md                  1.79578
amdet_fac                   5.78382
r_det                      0.804711
cca_md_knob                0.121183
cca_dp_knob                0.409278
cca_sh_knob                0.481623
mparwtr                  0.00149182
qlmin                   0.000217179
gwd_frc                     5.17269
fbcd                        8.28809
gwd_fsat                   0.992424
gsharp                     0.795067
nsigma                      1.74482
g0_rp                       6.80124
ricrit_rp                   1.99212
orog_drag_param            0.427476
a_ent_1_rp                 0.383495
g1_rp                       1.02005
zhloc_depth_fac            0.724076
par_mezcla                 0.217905
lambda_min_rp                19.526
dec_thres_cloud           0.0371336
dec_thres_cu              0.0804083
forced_cu_fac               

In [8]:
# Print as a numpy array
member_values_array = member_values.values
print(member_values_array)

[1 1 834 0.499737812 1.795783339 5.783815837000001 0.804710523 0.121182876
 0.409277824 0.48162338 0.0014918179999999998 0.00021717900000000002
 5.172687355 8.288094148999999 0.9924237229999999 0.79506674
 1.7448213419999998 6.801242586 1.9921189380000002 0.427476113 0.383494826
 1.020052887 0.724075977 0.217904868 19.52595548 0.037133647 0.080408287
 0.7636096529999999 0.00070629 1.4391249069999998 24111.63506 0.642786286
 0.495280059 0.602281459 0.024616039 0.156192459 0.9363726640000001
 0.916880459 1.332416534 1.247157284 0.587435046 3.172929561
 0.9574039940000001 1.676383411 6.408450285 0.9896967870000001 19.7593117
 67.26997298 115.1802206 32.14234431 0.7141012240000001
 0.09184753400000001 0.07751848900000001 2.954868254 1.240819231
 -4.03906877 'aldpp']


### 2.3 For a particular parameter and member

In [9]:
# Get parameter value 'ent_fac_md' for member 'r001i1p00834'
member_param_value = ppe_dataframe.loc[member][param]

# Print
print(member_param_value)

1.795783339
