In [17]:
import dask.dataframe as dd
import pandas as pd
from pathlib import Path 

import numpy as np
import scipy.stats as sps
from sklearn import linear_model

In [14]:
PATH_MAVEN = Path("/home/marek")
PATH_NGI = Path(PATH_MAVEN) / "maven" / "data" / "sci" / "ngi"
PATH_NGI_L2 = Path(PATH_NGI) / "l2"

In [19]:
test_dir = Path(PATH_NGI_L2, "2016/01/*.csv")

In [270]:
ddf = dd.read_csv(test_dir)

In [271]:
def IO_orb(orbdata,io='I'):
    minalt = orbdata['alt'].min()
    peri_t = orbdata[orbdata['alt']==minalt]['t_unix'].unique()
    #if len(peri_t)>1:
    #    sys.exit('Non-unique time found at periapse '+str(orbdata['orbit'].unique()))
    #else:
    if io == 'I':
        return orbdata[orbdata['t_unix']<=peri_t[0]]
    elif io =='O':
        return orbdata[orbdata['t_unix']>peri_t[0]]
    else:
        return orbdata

In [272]:
ddf = ddf[["orbit", "alt", "species", "abundance", "t_unix"]]
ddf = ddf[ddf["abundance"] > 0.]
ddf = ddf[ddf["alt"] < 250]

In [273]:
ddf = ddf.map_partitions(IO_orb)

In [274]:
ddf.head()

Unnamed: 0,orbit,alt,species,abundance,t_unix
195,2441,216.3433,Ar,194.132038,1451616000.0
196,2441,215.3266,Ar,242.047639,1451616000.0
197,2441,214.3157,Ar,329.222788,1451616000.0
198,2441,213.3106,Ar,368.484369,1451616000.0
199,2441,212.3111,Ar,416.351715,1451616000.0


In [275]:
orbs = ddf["orbit"].unique().compute().tolist()

In [276]:
def df_from_orb_range(daskdf, orbit, orb_step=5):
    look_for_orbs = list(range(orbit - orb_step, orbit + orb_step))
    temp_ddf = daskdf[daskdf["orbit"].isin(look_for_orbs)]
    temp_df = temp_ddf.compute()
    return temp_df

def pivot_df(df):
    temp_df = df.pivot_table(values=["abundance"], index=["orbit","alt", "species"]).unstack()
    return temp_df

def make_ratio_col(df):
    df["N2/Ar"] = df["abundance"]["N2"] / df["abundance"]["Ar"]
    return df

def fit_ratio_alt(df):
    x = df[["alt"]]
    y = np.log(df["N2/Ar"])
    lr = linear_model.LinearRegression()
    lr.fit(x, y)
    return lr

def hp_from_fit(ratio, slope, intercept):
    return (np.log(ratio)-intercept)/slope

In [278]:
for orb in orbs[0:5]:
    temp_df = df_from_orb_range(ddf, orb, orb_step=5)
    norbs = len(temp_df["orbit"].unique())
    temp_df = pivot_df(temp_df)
    temp_df = make_ratio_col(temp_df)
    final_df = temp_df["N2/Ar"].reset_index().dropna(subset=["alt", "N2/Ar"])
    fit = fit_ratio_alt(final_df)
    hp = hp_from_fit(1.25, float(fit.coef_),fit.intercept_)
    print(orb, hp, norbs, float(fit.coef_), fit.intercept_)

[2436, 2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444, 2445]
2441 92.44266790773604 4 0.028038886678739612 -2.36884593843116
[2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446]
2442 87.74570066184259 5 0.02593049960455158 -2.052146304998801
[2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448]
2444 83.92270969258767 7 0.023926715245991537 -1.7848512261723495
[2440, 2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448, 2449]
2445 85.1653131009223 8 0.023753022321350046 -1.7997900317767632
[2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448, 2449, 2450]
2446 86.05221177492734 9 0.024133110376929133 -1.8535639736289928
