In [20]:
import pathlib
import numpy as np
import pandas as pd


In [21]:
pd.set_option('display.max_rows', 500)

In [22]:
field_names = [
    'Валынтойское',
    'Вынгаяхинское',
    'Крайнее',
    'Отдельное',
    'Романовское',
    'Холмогорское',
]
tables = [
    'fond',
    'frac',
    'gdis',
    'merop',
    'mersum',
    'sh',
    'sh_sost_fond',
    'sost',
    'sppl',
    'troil',
    'welllist',
    'wellplast',
]
path = pathlib.Path.cwd()


In [23]:
data = {}
for field_name in field_names:
    data[field_name] = {}
    for table in tables:
        if table == 'gdis':
            df = df = pd.read_excel(path / field_name / 'gdis.xlsm')
        else:
            df = pd.read_feather(path / field_name / f'{table}.feather')
        data[field_name][table] = df
        
#         if table == 'welllist':
#             print(field_name, df.loc[df['ois'] != 0]['ceh'].unique())


In [24]:
for field_name in field_names:
    s = data[field_name]['gdis']['Lэфф,м']
    print(field_name, s.dtypes)
    print()
    print(s.unique())
    print()


Валынтойское float64

[ nan 704.]

Вынгаяхинское float64

[ nan 349. 152. 233. 750. 797. 540. 675. 560. 200. 561. 740. 284. 638.
 290. 660. 178. 640. 260. 352.  63. 168. 156. 293. 644. 556. 587. 664.
 882. 160.  84. 298. 527. 709. 412. 174. 259.]

Крайнее float64

[ nan 200. 202. 213. 100. 323. 313. 226. 456. 590. 624. 560. 500. 497.
 477. 287. 583. 212. 270. 558. 487. 585.]

Отдельное float64

[  nan 1310.  875.  594.  320. 1140.]

Романовское float64

[ nan 661. 260. 653. 170. 300. 650. 186. 400. 500. 671. 480. 275. 600.
 507. 312.]

Холмогорское float64

[ nan 550. 244. 199. 290. 130. 150. 154. 285.]



In [19]:
df = data['Вынгаяхинское']['gdis']
wrong_values = [
    '~200',
    '215 - 246',
]
df.loc[df['Lэфф,м'].isin(wrong_values), ['Скважина', 'Lэфф,м']]


Unnamed: 0,Скважина,"Lэфф,м"
754,286_407G,~200
1334,286_998G2,215 - 246


In [None]:
data['Крайнее']['welllist']

In [None]:
data['Крайнее']['wellplast']

In [None]:
data['Крайнее']['sppl']

In [None]:
dfs = []
for field_name in field_names:
    df_field = data[field_name]['sppl']
    df_field['field_name'] = field_name
    dfs.append(df_field)
df_sppl = pd.concat(dfs)
df_sppl.replace(0, np.nan, inplace=True)
df_sppl


In [None]:
params = [
    'pm',
    'nb',
    'sp',
    'sn',
    'sw',
    'hs'
]
df_sppl[params].median()


In [None]:
dfs = []
for field_name in field_names:
    df_field = data[field_name]['troil']
    df_field['field_name'] = field_name
    dfs.append(df_field)
df_troil = pd.concat(dfs, ignore_index=True)

df_troil = df_troil.loc[df_troil['well.ois'] != 0]
df_troil.replace(0, np.nan, inplace=True)
df_troil.sort_values(by=['well.ois', 'dt'], inplace=True)

df_troil


In [None]:
df_troil.count()

In [None]:
df_troil['grpdate'].dtype

In [None]:
df_troil.loc[df_troil['skvtype'] == '']

In [None]:
df_troil.loc[df_troil['well.ois'] == 2560617200]

In [None]:
df_troil.loc[df_troil['well.ois'] == 2560617200, 'skvtype'].value_counts().idxmax()

In [None]:
params = [
    'mu_liq',
    'ob_kt',
    'oilsaturatedthickness',
]
df_troil[params].median()


In [None]:
df_merop = data['Крайнее']['merop']
df_merop.loc[df_merop['well.ois'] == 2560617200]


In [None]:
data['Крайнее']['sh_sost_fond']

In [None]:
data['Крайнее']['sh_sost_fond'].dtypes

In [None]:
df_sh_sost_fond = data['Крайнее']['sh_sost_fond']
df_sh_sost_fond_well = df_sh_sost_fond.loc[df_sh_sost_fond['well.ois'] == 2560002200]
df_sh_sost_fond_well.set_index(keys=['dt'], inplace=True)
type(df_sh_sost_fond_well.index[0])


In [None]:
data['Холмогорское']['sh'].sort_values(by='well.ois')

In [None]:
data['Крайнее']['mersum']