In [1]:
import oracledb, os, json
from google.cloud import secretmanager
import pandas as pd
import numpy as np
import paramiko
from io import StringIO

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
def set_secrets_as_envs():
  secrets = secretmanager.SecretManagerServiceClient()
  resource_name = f"{os.environ['KNADA_TEAM_SECRET']}/versions/latest"
  secret = secrets.access_secret_version(name=resource_name)
  secret_str = secret.payload.data.decode('UTF-8')
  secrets = json.loads(secret_str)
  os.environ.update(secrets)

In [4]:
def oracle_secrets():
  set_secrets_as_envs()
  return dict(
    user=os.getenv('DB_USER'),
    password=os.getenv('DB_PASSWORD'),
    host = os.getenv('DBT_ORCL_HOST'),
    service = os.getenv('DBT_ORCL_SERVICE'),
    sftpkey = os.getenv('SFTPKEY'),
    encoding="UTF-8",
    nencoding="UTF-8"
  )

oracle_secrets = oracle_secrets()

In [5]:
# finn ut hva som står på serveren i bidrag området

keyfile = StringIO(oracle_secrets['sftpkey'])
mykey = paramiko.RSAKey.from_private_key(keyfile, password=np.nan)

# Open a transport
host,port = "a01drvl099.adeo.no",22
transport = paramiko.Transport((host,port))

# Auth    
username= "srv-dv-familie-airflow-sas"
transport.connect(username=username,pkey=mykey)

with paramiko.SFTPClient.from_transport(transport) as sftp:
    yup =sftp.listdir('./inbound/kildefiler/bidrag/')

yup

SSHException: Unable to connect to a01drvl099.adeo.no: [Errno 110] Connection timed out

In [5]:
def do_berm_etl():
    keyfile = StringIO(oracle_secrets['sftpkey'])
    mykey = paramiko.RSAKey.from_private_key(keyfile, password=np.nan)

    # Open a transport
    host,port = "a01drvl099.adeo.no",22
    transport = paramiko.Transport((host,port))

    # Auth    
    username= "srv-dv-familie-airflow-sas"
    transport.connect(username=username,pkey=mykey)

    for i in range(1, 13):
        if i < 10:
            i = '0'+str(i) #01, 02.....09
        with paramiko.SFTPClient.from_transport(transport) as sftp:
            #print("connected")
            #print(sftp.get_channel())#chdir('kildefiler/bidrag')
            sftp.get(f'./inbound/kildefiler/bidrag/BIDRAG_BERM_M2024{i}', f'../data/BIDRAG_BERM_M2024{i}')
            #print(sftp.listdir(path='.'))

    # Close
    if sftp: sftp.close()
    if transport: transport.close()

In [6]:
do_berm_etl()

SSHException: Unable to connect to a01drvl099.adeo.no: [Errno 110] Connection timed out

In [None]:
def do_stonad_etl():
    keyfile = StringIO(oracle_secrets['sftpkey'])
    mykey = paramiko.RSAKey.from_private_key(keyfile, password=np.nan)

    # Open a transport
    host,port = "a01drvl099.adeo.no",22
    transport = paramiko.Transport((host,port))

    # Auth    
    username= "srv-dv-familie-airflow-sas"
    transport.connect(username=username,pkey=mykey)

    for i in range(1, 13):
        if i < 10:
            i = '0'+str(i) #01, 02.....09
        with paramiko.SFTPClient.from_transport(transport) as sftp:
            #print("connected")
            #print(sftp.get_channel())#chdir('kildefiler/bidrag')
            sftp.get(f'./inbound/kildefiler/bidrag/BIDRAG_STONAD_M2024{i}', f'../data/BIDRAG_STONAD_M2024{i}')
            #print(sftp.listdir(path='.'))

    # Close
    if sftp: sftp.close()
    if transport: transport.close()

In [None]:
do_stonad_etl()

In [None]:
%%bash

num = 0
for a in {1..12}; 
do
    if [[ $a -lt 10 ]]
    then
        num=0$a
    cat ../data/BIDRAG_STONAD_M2024$num | tr -d '\000' > ../data/BIDRAG_STONAD_M2024$num.txt;
    elif [[ $a -gt 9 ]]
    then
        num=$a
    cat ../data/BIDRAG_STONAD_M2024$num | tr -d '\000' > ../data/BIDRAG_STONAD_M2024$num.txt;
    fi
    done 

In [None]:
barn_df_stonad = pd.read_excel('posisjoner/barn_stonad_pos.xlsx',header=0 )
barn_df_stonad.rename(columns={"Variabel-navn": "feltnavn", "Start-pos. i Bidrag-fila": "start_pos", "Antall posi-sjoner": "antall_pos"}, inplace = True)

In [None]:
barn_df_berm = pd.read_excel('./posisjoner/barn_berm_posisjoner.xlsx',header=0 )
barn_df_berm.rename(columns={"Forslag til Feltnavn": "feltnavn", "Start-pos. i BBM-fila": "start_pos", "Antall posi-sjoner": "antall_pos"}, inplace = True)

In [None]:
user = oracle_secrets['user'] + '[DVH_FAM_BB]'
dsn_tns = oracledb.makedsn(oracle_secrets['host'], 1521, service_name = oracle_secrets['service'])

In [None]:
def hent_mottaker_berm_data(df_berm, df_barn_berm):

    mottaker_df_berm = pd.read_excel('./posisjoner/mottaker_berm_posisjoner.xlsx',header=0 )
    mottaker_df_berm.rename(columns={"Feltnavn": "feltnavn", "Start-pos. i BBM-fila": "start_pos", "Antall posi-sjoner": "antall_pos"}, inplace = True)

    df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
    filt = (df_berm['REC_TYPE'] == '02')
    df_rec_type_2 = df_berm.loc[filt].copy()

    for index, row in mottaker_df_berm.iterrows():
        start_pos = row['start_pos'] - 1
        end_pos = start_pos + row['antall_pos']
        df_rec_type_2[row['feltnavn']] = df_rec_type_2[0].apply(lambda x: x[start_pos:end_pos])

    # henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
    df_mottaker_berm = df_rec_type_2[mottaker_df_berm['feltnavn']]
    df_mottaker_berm = df_mottaker_berm.replace(r'^\s*$', None, regex=True)

    # Filter df_mottaker based on SAKSNR in df_barn
    df_mottaker_berm = df_mottaker_berm[df_mottaker_berm['Saksnr'].isin(df_barn_berm['Saksnr'])]

    with oracledb.connect(user=user, password = oracle_secrets['password'], dsn=dsn_tns) as conn:
        with conn.cursor() as cursor:
            rows = [tuple(x) for x in df_mottaker_berm.values]
            cursor.executemany('''INSERT INTO FAM_BB_MOTTAKER_BERM(PERIODE,SAKSNR,REC_TYPE,FNR,VEDTDATO,SKATTFOR,INNTTYP1,INNTTYP2,INNTTYP3,INNTTYP4,INNTTYP5,KONTST
                                ,INNTBEL1,INNTBEL2,INNTBEL3,INNTBEL4,INNTBEL5,KONTSBEL,BTILRED,BARNETILBEL,BARNETILFORS)
                                VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17,:18,:19,:20,:21)''', rows)
            conn.commit()

In [None]:
def hent_pliktig_berm_data(df_berm, df_barn_berm):

    pliktig_df_berm = pd.read_excel('./posisjoner/pliktig_berm_posisjoner.xlsx',header=0 )
    pliktig_df_berm.rename(columns={"Feltnavn": "feltnavn", "Start-pos. i BBM-fila": "start_pos", "Antall posi-sjoner": "antall_pos"}, inplace = True)

    df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
    filt = (df_berm['REC_TYPE'] == '01')
    df_rec_type_1 = df_berm.loc[filt].copy()

    for index, row in pliktig_df_berm.iterrows():
        start_pos = row['start_pos'] - 1
        end_pos = start_pos + row['antall_pos']
        df_rec_type_1[row['feltnavn']] = df_rec_type_1[0].apply(lambda x: x[start_pos:end_pos])

    # henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
    df_pliktig_berm = df_rec_type_1[pliktig_df_berm['feltnavn']]
    df_pliktig_berm = df_pliktig_berm.replace(r'^\s*$', None, regex=True)

    # Filter df_mottaker based on SAKSNR in df_barn
    df_pliktig_berm = df_pliktig_berm[df_pliktig_berm['Saksnr'].isin(df_barn_berm['Saksnr'])]
    
    # Replace commas with dots and convert to numeric
    df_pliktig_berm['Antbarnh'] = df_pliktig_berm['Antbarnh'].str.replace(',', '.', regex=False).astype(float)

    
    with oracledb.connect(user=user, password = oracle_secrets['password'], dsn=dsn_tns) as conn:
        with conn.cursor() as cursor:
            rows = [tuple(x) for x in df_pliktig_berm.values]
            cursor.executemany('''INSERT INTO FAM_BB_PLIKTIGE_BERM(PERIODE,SAKSNR,REC_TYPE,FNR,VEDTDATO,ANTBARNH,INNTTYP1,INNTTYP2,INNTTYP3
                        ,INNTTYP4,INNTTYP5,KONTST,INNTBEL1,INNTBEL2,INNTBEL3,INNTBEL4,INNTBEL5,KONTSBEL,BTILRED,BARNETILBEL,BARNETILFORS,BIDREVNE,BOFORHOLD)
                                VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17,:18,:19,:20,:21,:22,:23)''', rows)
            conn.commit()

In [None]:
def hent_barn_berm_data(df, i):
    df_berm = pd.read_csv(f"../data/BIDRAG_BERM_M2024{i}", skip_blank_lines=True, sep='\0', header=None)

    df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
    filt = (df_berm['REC_TYPE'] == '03')
    df_rec_type_3 = df_berm.loc[filt].copy()
    
    for index, row in barn_df_berm.iterrows():
        start_pos = row['start_pos'] - 1
        end_pos = start_pos + row['antall_pos']
        #print(row['feltnavn'],start_pos, end_pos)
        df_rec_type_3[row['feltnavn']] = df_rec_type_3[0].apply(lambda x: x[start_pos:end_pos])
        
    # henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra barn_berm som inneholder navn på alle kolonner
    df_barn_berm = df_rec_type_3[barn_df_berm['feltnavn']]
    df_barn_berm = df_barn_berm.replace(r'^\s*$', None, regex=True)
    
    # Filter df_mottaker based on SAKSNR in df_barn
    df_barn_berm = df_barn_berm[df_barn_berm['Saksnr'].isin(df_barn_stonad['SAKSNR'])]
    
    df_barn_berm = df_barn_berm.loc[filt].copy()

    df_barn_berm['Bidrbe'] = pd.to_numeric(df_barn_berm['Bidrbe'].str.strip()) # convert this value '0      ' to nummeric 0

    with oracledb.connect(user=user, password = oracle_secrets['password'], dsn=dsn_tns) as conn:
        with conn.cursor() as cursor:
            rows = [tuple(x) for x in df_barn_berm.values]
            cursor.executemany('''INSERT INTO FAM_BB_BARN_BERM(PERIODE,SAKSNR,REC_TYPE,OBJNR,FNR,VEDTDATO,BIDRBE,BIDRBEL,BIDRTIL,BIDRES,HGBERM,UGBERM,AARSAK,SAMV,UNDERH,INNTTYP1,INNTTYP2,INNTTYP3,INNTTYP4,INNTTYP5,KONTST,
            INNTBEL1,INNTBEL2,INNTBEL3,INNTBEL4,INNTBEL5,KONTSBEL,BTILRED,HD,STDEKN,BTBEL,BTFAK,FORPL,TELLER,NEVNER,BPDELU,VIRKDATOBB,VEDTDATOFO,VIRKDATOFO,BPPROS,BTSKODE) 
                                    VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17,:18,:19,:20,:21,:22,:23,:24,
                                    :25,:26,:27,:28,:29,:30,:31,:32,:33,:34,:35,:36,:37,:38,:39,:40,:41)''',rows)
            conn.commit()
            
    hent_mottaker_berm_data(df_berm, df_barn_berm)
    
    hent_pliktig_berm_data(df_berm, df_barn_berm)

### barn_berm data

In [None]:
%%time 

for i in range(1, 13):
    if i < 10:
        i = '0'+str(i)
    df_stonad = pd.read_csv(f"../data/BIDRAG_STONAD_M2024{i}.txt", skip_blank_lines=True, sep='\0', header=None)
    
    df_stonad['REC_TYPE'] = df_stonad[0].apply(lambda x: x[13:15])
    filt = (df_stonad['REC_TYPE'] == '03')
    df_rec_type_3 = df_stonad.loc[filt].copy()
    
    # her henter vi kolonne navn, posisjoner fra barn_df_stonad (excel fila) og så henter data fra filen BIDRAG_STONAD_M2023xx basert på kolonne og posisjoner
    for index, row in barn_df_stonad.iterrows():
        start_pos = row['start_pos'] - 1
        end_pos = start_pos + row['antall_pos']
        df_rec_type_3[row['feltnavn']] = df_rec_type_3[0].apply(lambda x: x[start_pos:end_pos])
        
    # henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
    df_barn_stonad = df_rec_type_3[barn_df_stonad['feltnavn']]
    df_barn_stonad = df_barn_stonad.replace(r'^\s*$', None, regex=True)
    
    filt = (((df_barn_stonad['FORSKUDD'].fillna(0).astype(int) > 0)|(df_barn_stonad['BIDRAG_G'].fillna(0).astype(int) > 0) ))
    df_barn_stonad = df_barn_stonad.loc[filt].copy()

    ####### barn_berm basert på barn_stønad data (merk filteren)

    hent_barn_berm_data(df_barn_stonad, i)

In [8]:
#### single periode

df_stonad = pd.read_csv(f"../data/BIDRAG_STONAD_M202301.txt", skip_blank_lines=True, sep='\0', header=None)

df_stonad['REC_TYPE'] = df_stonad[0].apply(lambda x: x[13:15])
filt = (df_stonad['REC_TYPE'] == '03')
df_rec_type_3 = df_stonad.loc[filt].copy()

# her henter vi kolonne navn, posisjoner fra barn_df_stonad (excel fila) og så henter data fra filen BIDRAG_STONAD_M2023xx basert på kolonne og posisjoner
for index, row in barn_df_stonad.iterrows():
    start_pos = row['start_pos'] - 1
    end_pos = start_pos + row['antall_pos']
    df_rec_type_3[row['feltnavn']] = df_rec_type_3[0].apply(lambda x: x[start_pos:end_pos])
    
# henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
df_barn_stonad = df_rec_type_3[barn_df_stonad['feltnavn']]
df_barn_stonad = df_barn_stonad.replace(r'^\s*$', None, regex=True)

filt = (((df_barn_stonad['FORSKUDD'].fillna(0).astype(int) > 0)|(df_barn_stonad['BIDRAG_G'].fillna(0).astype(int) > 0) ))
df_barn_stonad = df_barn_stonad.loc[filt].copy()


####### barn berm #########

df_berm = pd.read_csv(f"../data/BIDRAG_BERM_M202301", skip_blank_lines=True, sep='\0', header=None)

df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
filt = (df_berm['REC_TYPE'] == '03')
df_rec_type_3 = df_berm.loc[filt].copy()

for index, row in barn_df_berm.iterrows():
    start_pos = row['start_pos'] - 1
    end_pos = start_pos + row['antall_pos']
    #print(row['feltnavn'],start_pos, end_pos)
    df_rec_type_3[row['feltnavn']] = df_rec_type_3[0].apply(lambda x: x[start_pos:end_pos])
    
# henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra barn_berm som inneholder navn på alle kolonner
df_barn_berm = df_rec_type_3[barn_df_berm['feltnavn']]
df_barn_berm = df_barn_berm.replace(r'^\s*$', None, regex=True)

# Filter df_mottaker based on SAKSNR in df_barn
df_barn_berm = df_barn_berm[df_barn_berm['Saksnr'].isin(df_barn_stonad['SAKSNR'])]

df_barn_berm = df_barn_berm.loc[filt].copy()

df_barn_berm['Bidrbe'] = pd.to_numeric(df_barn_berm['Bidrbe'].str.strip())

######## Mottaker ##########

mottaker_df_berm = pd.read_excel('./posisjoner/mottaker_berm_posisjoner.xlsx',header=0 )
mottaker_df_berm.rename(columns={"Feltnavn": "feltnavn", "Start-pos. i BBM-fila": "start_pos", "Antall posi-sjoner": "antall_pos"}, inplace = True)

df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
filt = (df_berm['REC_TYPE'] == '02')
df_rec_type_2 = df_berm.loc[filt].copy()

for index, row in mottaker_df_berm.iterrows():
    start_pos = row['start_pos'] - 1
    end_pos = start_pos + row['antall_pos']
    df_rec_type_2[row['feltnavn']] = df_rec_type_2[0].apply(lambda x: x[start_pos:end_pos])

# henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
df_mottaker_berm = df_rec_type_2[mottaker_df_berm['feltnavn']]
df_mottaker_berm = df_mottaker_berm.replace(r'^\s*$', None, regex=True)

# Filter df_mottaker based on SAKSNR in df_barn
df_mottaker_berm = df_mottaker_berm[df_mottaker_berm['Saksnr'].isin(df_barn_berm['Saksnr'])]

########### pliktig berm ##########

pliktig_df_berm = pd.read_excel('./posisjoner/pliktig_berm_posisjoner.xlsx',header=0 )
pliktig_df_berm.rename(columns={"Feltnavn": "feltnavn", "Start-pos. i BBM-fila": "start_pos", "Antall posi-sjoner": "antall_pos"}, inplace = True)

df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
filt = (df_berm['REC_TYPE'] == '01')
df_rec_type_1 = df_berm.loc[filt].copy()

for index, row in pliktig_df_berm.iterrows():
    start_pos = row['start_pos'] - 1
    end_pos = start_pos + row['antall_pos']
    df_rec_type_1[row['feltnavn']] = df_rec_type_1[0].apply(lambda x: x[start_pos:end_pos])

# henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
df_pliktig_berm = df_rec_type_1[pliktig_df_berm['feltnavn']]
df_pliktig_berm = df_pliktig_berm.replace(r'^\s*$', None, regex=True)

# Filter df_mottaker based on SAKSNR in df_barn
df_pliktig_berm = df_pliktig_berm[df_pliktig_berm['Saksnr'].isin(df_barn_berm['Saksnr'])]

# Replace commas with dots and convert to numeric
df_pliktig_berm['Antbarnh'] = df_pliktig_berm['Antbarnh'].str.replace(',', '.', regex=False).astype(float)




In [9]:
df_mottaker_berm

Unnamed: 0,Periode,Saksnr,Rec-type,Fnr,Vedtdato,Skattfor,Innttyp1,Innttyp2,Innttyp3,Innttyp4,Innttyp5,Kontst,Inntbel1,Inntbel2,Inntbel3,Inntbel4,Inntbel5,Kontsbel,Btilred,Barnetilbel,Barnetilfors
1,202301,0000267,02,15087927641,20151216,0000000,UBAT,AG,,,,0000,00011640,00000000,00000000,00000000,00000000,0000 000,0,000 000,0
4,202301,0000485,02,21117535662,20150121,0000000,UBAT,ESBT,EFOS,FSEF,,0000,00011640,00007920,00176736,00013177,00000000,0000 000,0,000 000,0
12,202301,0000500,02,22098330864,20200513,0000000,SAK,,,,,0000,00000000,00000000,00000000,00000000,00000000,0000 000,0,000 000,0
15,202301,0000611,02,04097735218,20111028,0012208,UBAT,SAK,,,,0000,00011640,00151284,00000000,00000000,00000000,0000 000,0,000 000,0
21,202301,0002523,02,04107940001,20190118,0000000,UBAT,AT,FSEF,,,0000,00011640,00200720,00012977,00000000,00000000,0000 000,N,0007605,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159233,202301,9917661,02,10036925405,20170607,0000000,PE,,,,,0000,00293544,00000000,00000000,00000000,00000000,0000 000,0,000 000,0
159236,202301,9917764,02,11097444236,20230113,0000000,SAK,,,,,0000,00253395,00000000,00000000,00000000,00000000,0000 000,0,000 000,0
159239,202301,9917952,02,15056819247,20060607,0009912,UBAT,PE,,,,0000,00011640,00127020,00000000,00000000,00000000,0000 000,0,000 000,0
159245,202301,9918808,02,10057828035,20171109,0000000,MDOK,,,,,0000,00288500,00000000,00000000,00000000,00000000,0000 000,0,000 000,0


In [12]:
# feil format (Kontsbel, Barnetilbel)
df_mottaker_berm.Barnetilfors.str.isnumeric().unique()

array([ True])

In [11]:
non_numeric_rows = df_mottaker_berm[~df_mottaker_berm.Barnetilbel.str.isnumeric()]
non_numeric_rows.Barnetilbel.unique()

array(['000 000'], dtype=object)

In [13]:
df_pliktig_berm

Unnamed: 0,Periode,Saksnr,Rec-type,Fnr,Vedtdato,Antbarnh,Innttyp1,Innttyp2,Innttyp3,Innttyp4,Innttyp5,Kontst,Inntbel1,Inntbel2,Inntbel3,Inntbel4,Inntbel5,Kontsbel,Btilred,Barnetilbel,Barnetilfors,Bidrevne,Boforhold
2,202301,0000267,01,10027019787,20151216,00,AG,,,,,,00180180,00000000,00000000,00000000,00000000,00000000,N,0024570,,0000000,00
5,202301,0000485,01,07087549792,20150121,00,SAK,,,,,,00183820,00000000,00000000,00000000,00000000,00000000,N,0000585,,0000000,00
13,202301,0000500,01,10058322712,20200513,00,PE,,,,,,00287592,00000000,00000000,00000000,00000000,00000000,N,0003329,,0000576,00
16,202301,0000611,01,01076613326,20111028,00,SAK,,,,,,00000000,00000000,00000000,00000000,00000000,00000000,,0000000,,0000000,00
22,202301,0002523,01,14077634151,20190118,00,PE,,,,,,00322032,00000000,00000000,00000000,00000000,00000000,N,0041977,,0002917,00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159234,202301,9917661,01,06126125702,20170607,00,MDOK,,,,,,00051282,00000000,00000000,00000000,00000000,00000000,,0000000,,0000000,00
159237,202301,9917764,01,07096129755,20230113,00,PE,,,,,,00391308,00000000,00000000,00000000,00000000,00000000,N,0003621,,0004390,00
159240,202301,9917952,01,24036824375,20060607,00,MDOK,,,,,,00220000,00000000,00000000,00000000,00000000,00000000,,0000000,,0003433,00
159246,202301,9918808,01,10106621328,20171109,00,EVNE,,,,,,00230800,00000000,00000000,00000000,00000000,00000000,,0000000,,0000000,00


In [23]:
# feil format (Antbarnh)
df_pliktig_berm.Bidrevne.str.isnumeric().unique()

array([ True])

In [15]:
non_numeric_rows = df_pliktig_berm[~df_pliktig_berm.Inntbel1.str.isnumeric()]
non_numeric_rows.Inntbel1.unique()

array(['0,0', '1,0', '2,0', '3,0', '0,5', '1,5', '4,0', '5,0', '2,5',
       '7,0', '6,0', '3,5', '9,0'], dtype=object)

In [None]:
def hent_barn_berm_data(df, i):
    df_berm = pd.read_csv(f"../data/BIDRAG_BERM_M2023{i}", skip_blank_lines=True, sep='\0', header=None)

    df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
    filt = (df_berm['REC_TYPE'] == '03')
    df_rec_type_3 = df_berm.loc[filt].copy()
    
    for index, row in barn_df_berm.iterrows():
        start_pos = row['start_pos'] - 1
        end_pos = start_pos + row['antall_pos']
        #print(row['feltnavn'],start_pos, end_pos)
        df_rec_type_3[row['feltnavn']] = df_rec_type_3[0].apply(lambda x: x[start_pos:end_pos])
        
    # henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra barn_berm som inneholder navn på alle kolonner
    df_barn_berm = df_rec_type_3[barn_df_berm['feltnavn']]
    df_barn_berm = df_barn_berm.replace(r'^\s*$', None, regex=True)
    
    # Filter df_mottaker based on SAKSNR in df_barn
    df_barn_berm = df_barn_berm[df_barn_berm['Saksnr'].isin(df_barn_stonad['SAKSNR'])]
    
    df_barn_berm = df_barn_berm.loc[filt].copy()

    df_barn_berm['Bidrbe'] = pd.to_numeric(df_barn_berm['Bidrbe'].str.strip()) # convert this value '0      ' to nummeric 0

    with oracledb.connect(user=user, password = oracle_secrets['password'], dsn=dsn_tns) as conn:
        with conn.cursor() as cursor:
            rows = [tuple(x) for x in df_barn_berm.values]
            cursor.executemany('''INSERT INTO FAM_BB_BARN_BERM(PERIODE,SAKSNR,REC_TYPE,OBJNR,FNR,VEDTDATO,BIDRBE,BIDRBEL,BIDRTIL,BIDRES,HGBERM,UGBERM,AARSAK,SAMV,UNDERH,INNTTYP1,INNTTYP2,INNTTYP3,INNTTYP4,INNTTYP5,KONTST,
            INNTBEL1,INNTBEL2,INNTBEL3,INNTBEL4,INNTBEL5,KONTSBEL,BTILRED,HD,STDEKN,BTBEL,BTFAK,FORPL,TELLER,NEVNER,BPDELU,VIRKDATOBB,VEDTDATOFO,VIRKDATOFO,BPPROS,BTSKODE) 
                                    VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17,:18,:19,:20,:21,:22,:23,:24,
                                    :25,:26,:27,:28,:29,:30,:31,:32,:33,:34,:35,:36,:37,:38,:39,:40,:41)''',rows)
            conn.commit()
            
    hent_mottaker_berm_data(df_berm, df_barn_berm, conn)
    
    hent_pliktig_berm_data(df_berm, df_barn_berm, conn)

In [None]:
def hent_pliktig_berm_data(df_berm, df_barn_berm, conn):

    pliktig_df_berm = pd.read_excel('./posisjoner/pliktig_berm_posisjoner.xlsx',header=0 )
    pliktig_df_berm.rename(columns={"Forslag til Feltnavn": "feltnavn", "Start-pos. i BBM-fila": "start_pos", "Antall posi-sjoner": "antall_pos"}, inplace = True)

    df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
    filt = (df_berm['REC_TYPE'] == '01')
    df_rec_type_1 = df_berm.loc[filt].copy()

    for index, row in pliktig_df_berm.iterrows():
        start_pos = row['start_pos'] - 1
        end_pos = start_pos + row['antall_pos']
        df_rec_type_1[row['feltnavn']] = df_rec_type_1[0].apply(lambda x: x[start_pos:end_pos])

    # henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
    df_pliktig_berm = df_rec_type_1[pliktig_df_berm['feltnavn']]
    df_pliktig_berm = df_pliktig_berm.replace(r'^\s*$', None, regex=True)

    # Filter df_mottaker based on SAKSNR in df_barn
    df_pliktig_berm = df_pliktig_berm[df_pliktig_berm['SAKSNR'].isin(df_barn_berm['SAKSNR'])]

     with conn.cursor() as cursor:
        rows = [tuple(x) for x in df_pliktig_berm.values]
        cursor.executemany('''INSERT INTO FAM_BB_PLIKTIG_BERM('PERIODE','SAKSNR','REC_TYPE','FNR','VEDTDATO','ANTBARNH','INNTTYP1','INNTTYP2','INNTTYP3'
                    ,'INNTTYP4','INNTTYP5','KONTST','INNTBEL1','INNTBEL2','INNTBEL3','INNTBEL4','INNTBEL5','KONTSBEL','BTILRED','BARNETILBEL','BARNETILFORS','BIDREVNE','BOFORHOLD')
                            VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17,:18,:19,:20,:21,:22,:23)''', rows)
         conn.commit()
       

In [9]:
df_barn_berm.Bidrbe.str.isnumeric().unique()

array([ True, False])

In [None]:
non_numeric_rows = df_barn_berm[~df_barn_berm.Inntbel5.str.isnumeric()]
non_numeric_rows.Inntbel5.unique()

In [15]:
filt = (df_barn_berm.Bidrbe != '000 0000')
df_barn_berm.loc[filt]

Unnamed: 0,Periode,Saksnr,Rec-type,Objnr,Fnr,Vedtdato,Bidrbe,Bidrbel,Bidrtil,Bidres,HGBERM,UGBERM,Aarsak,Samv,Underh,Innttyp1,Innttyp2,Innttyp3,Innttyp4,Innttyp5,KONTST,Inntbel1,Inntbel2,Inntbel3,Inntbel4,Inntbel5,Kontsbel,Btilred,HD,Stdekn,Btbel,Btfak,Forpl,Teller,Nevner,Bpdelu,VIRKDATOBB,VEDTDATOFO,VIRKDATOFO,BPPROS,BTSKODE
2296,202302,500498,3,3,3030478478,20220916,5580,5580,,KBB,18,S3,S,0,11094,KAPS,AG,0,0,0,0,497,74858,000 0000,0,,,,,,4980303,,,0,0,5580,20220701,20220209.0,20220301.0,50,
69104,202302,1309733,3,3,17120496972,20230125,6512,6510,,KBB,18,S2,D,0,11094,KAPS,AG,0,0,0,0,576,11675,000 0000,0,,,,,,7330303,,,0,0,6512,20230101,20180127.0,20180301.0,58,
87188,202302,1504418,3,3,3030191896,20220926,5669,5670,,KBB,18,K1,S,0,11094,KAPS,SAK,0,0,0,0,27621,55800,000 0000,0,,,,,,4180303,,,0,0,5669,20220701,,,51,
110680,202302,1703341,3,3,29050490082,20221005,6756,6760,,KBB,18,S3,D,0,11094,KAPS,AG,0,0,0,0,256,124656,000 0000,0,,,,,,3410303,,,0,0,6756,20220901,20220228.0,20211101.0,60,
156730,202302,2209135,3,3,11010285021,20230221,5614,5610,,KBB,18,S3,S,0,11094,KAPS,SAK,0,0,0,0,48730,5943,000 0000,0,,,,,,1350303,,,0,0,5614,20230101,,,50,


In [None]:
# Strip leading/trailing spaces and replace non-numeric '0' with numeric 0
df_barn_berm['Bidrbe'] = df_barn_berm['Bidrbe'].str.strip()  # Remove leading/trailing spaces
df_barn_berm['Bidrbe'] = df_barn_berm['Bidrbe'].astype(int)#replace('0', '0.0')  # Ensure 0 is treated as numeric

# Convert the column to numeric, forcing errors to NaN, then replace NaN with 0
#df_barn_berm['Bidrbe'] = pd.to_numeric(df_barn_berm['Bidrbe'], errors='coerce').fillna(0)

# Check the result
df_barn_berm.Bidrbe.unique()


In [25]:
def do_tl():

    user = oracle_secrets['user'] + '[DVH_FAM_BB]'
    dsn_tns = oracledb.makedsn(oracle_secrets['host'], 1521, service_name = oracle_secrets['service'])

    df_berm = pd.read_csv(f"data/BIDRAG_BERM_M202301", skip_blank_lines=True, sep='\0', header=None)

    #TODO
    df_berm['REC_TYPE'] = df_berm[0].apply(lambda x: x[13:15])
    filt = (df_berm['REC_TYPE'] == '03')
    df_rec_type_3 = df_berm.loc[filt].copy()

    for index, row in barn_df.iterrows():
        start_pos = row['start_pos'] - 1
        end_pos = start_pos + row['antall_pos']
        #print(row['feltnavn'],start_pos, end_pos)
        df_rec_type_3[row['feltnavn']] = df_rec_type_3[0].apply(lambda x: x[start_pos:end_pos])
        
    # henter kun kolonnene vi er intersert i ved bruk av kolonne "Feltnavn" fra mottaker_df som inneholder navn på alle kolonner
    df_barn = df_rec_type_3[barn_df['feltnavn']]
    df_barn = df_barn.replace(r'^\s*$', None, regex=True)

    # Filter df_mottaker based on SAKSNR in df_barn
    df_barn = df_barn[df_barn['Saksnr'].isin(df_barn_stonad['SAKSNR'])]
    
    df_barn = df_barn.loc[filt].copy()

    #(forskbis>0 or bidragg>0)

    #print(df_barn.head(5))

    with oracledb.connect(user=user, password = oracle_secrets['password'], dsn=dsn_tns) as conn:
            with conn.cursor() as cursor:
                rows = [tuple(x) for x in df_barn.values]
                cursor.executemany('''INSERT INTO FAM_BB_BARN_BERM(PERIODE,SAKSNR,REC_TYPE,OBJNR,FNR,VEDTDATO,BIDRBE,BIDRBEL,BIDRTIL,BIDRES,HGBERM,UGBERM,AARSAK,SAMV,UNDERH,INNTTYP1,INNTTYP2,INNTTYP3,INNTTYP4,INNTTYP5,KONTST,
    INNTBEL1,INNTBEL2,INNTBEL3,INNTBEL4,INNTBEL5,KONTSBEL,BTILRED,HD,STDEKN,BTBEL,BTFAK,FORPL,TELLER,NEVNER,BPDELU,VIRKDATOBB,VEDTDATOFO,VIRKDATOFO,BPPROS,BTSKODE) 
                                        VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17,:18,:19,:20,:21,:22,:23,:24,
                                        :25,:26,:27,:28,:29,:30,:31,:32,:33,:34,:35,:36,:37,:38,:39,:40,:41)''',rows)
                conn.commit()
   

In [44]:
do_tl()

In [4]:
import os

for filename in os.listdir('../data'):
    if os.path.isfile(os.path.join('../data', filename)):
        os.remove(os.path.join('../data', filename))