## Импорты и загрузка данных

In [3]:
import numpy as np
import pandas as pd

import os

In [109]:
path = os.getcwd()

fish_df = pd.read_csv(path + r"\data\db1\ref\fish.csv", error_bad_lines=False, sep=';')
prod_designate_df = pd.read_csv(path + r"\data\db1\ref\prod_designate.csv", error_bad_lines=False, sep=';')
prod_type_df = pd.read_csv(path + r"\data\db1\ref\prod_type.csv", error_bad_lines=False, sep=';')
regime_df = pd.read_csv(path + r"\data\db1\ref\regime.csv", error_bad_lines=False, sep=';')
region_df = pd.read_csv(path + r"\data\db1\ref\region.csv", error_bad_lines=False, sep=';')


catch_df = pd.read_csv(path + r'\data\db1\catch.csv', error_bad_lines=False, sep=',')

# Подмена данных + нормализация
def spoofing(id):
    return fish_df[fish_df['id_fish'] == id]['fish'].values[0]
catch_df['fish'] = catch_df['id_fish'].apply(spoofing)
catch_df['catch_volume'] = catch_df['catch_volume']*1000

product_df = pd.read_csv(path + r'\data\db1\product.csv', error_bad_lines=False, sep=',')

ext1 = pd.read_csv(path + r"\data\db2\Ext.csv", error_bad_lines=False, sep=',')
ext1 = ext1.drop(ext1[ext1['id_fishery'] == -1].index)
ext1 = ext1.drop(columns=['Name_Plat', 'Product_period', 'Region_Plat', 'numPart'])

ext2 = pd.read_csv(path + r"\data\db2\Ext2.csv", error_bad_lines=False, sep=',')

In [172]:
# Сводная таблица по всем поступлениям согласно IdFish и выбранного промежутка времени.
def calculatingSummOfFish(fish_name, date_start, date_end):
    print('Processing incoming values')
    # смотрим по всем выловам этой рыбы
    data = catch_df[catch_df['fish'] == fish_name]
    # уникальные номера кораблей ловящих только эту рыбу
    ships = list(set(data['id_ves'].to_list()))
    
    # создаю датафрейм с владельцами и выловом по судам    
    total_df = pd.DataFrame()
    
    total_ships = []
    total_owners = []
    total_catches = []
    
    # добавляем в общую кашу дату и документируем владельцев
    for ship in ships:
        ship_data = data[data['id_ves'] == ship]
        owners = list(set(ship_data['id_own'].to_list()))
        
        for owner in owners:
            ship_owner_data = ship_data[ship_data['id_own'] == owner]
            
            try:
                ship_owner_end_data = ship_owner_data[ship_owner_data['date'] < date_end]
                ship_owner_start_end_data = ship_owner_end_data[ship_owner_end_data['date'] > date_start]
                total_catch = ship_owner_start_end_data['catch_volume'].sum()
            except e:
                print(e)
            
            if total_catch > 0:
                total_catches.append(total_catch)
                total_ships.append(ship)
                total_owners.append(owner)
            
    total_df['total_catches'] = total_catches
    total_df['ship'] = total_ships
    total_df['owner'] = total_owners
    
    return total_df

In [173]:
# Вспомогательная функция вычисления для КОНКРЕТНЫХ ВЛАДЕЛЬЦЕВ ГРУПП СУДОВ, по FishID
def calculatingSummOfFishTransfering(fish_name, start, end):
    
    total_owners = []
    total_transfers = []
    total_fishery_date = []
    total_vsd_date = []
    
    # Смотрю по каждому ID акты передачи    
    vsd_fish_id = ext2[ext2['fish'] == fish_name]['id_vsd']
    # Для каждого акта определяю владельца     
    for vsd in vsd_fish_id:
        owners = ext1[ext1['id_vsd'] == vsd]['id_own']
        # Проверяю является ли владелец связанным по двум базам  
        for owner in owners:
            if owner in market_owners:
                data = ext2[ext2['id_vsd'] == vsd]
                try:
                    total_owners.append(owner)
                    total_transfers.append(data['volume'].values[0])
                    total_fishery_date.append(data['date_vsd'].values[0])
                    total_vsd_date.append(ext1[ext1['id_vsd'] == vsd]['date_fishery'].values[0])
                except Exception() as e:
                    print(e)
                    
    df = pd.DataFrame()
    
    df['owner'] = total_owners
    df['volume'] = total_transfers
    df['fishery_date'] = total_fishery_date
    df['vsd_date'] = total_vsd_date
    
    df['fishery_date'] = df['fishery_date'].apply(lambda x: x[5:10])
    df['vsd_date'] = df['vsd_date'].apply(lambda x: x[5:10])
    
    return df

### Поиск аномалий

#### Осуществим с помощью вычленения ID рыбы и даты вылова (для простоты возьмем весь период, но можно брать любой промежуток)

In [44]:
fish_id = input('Введите ID рыбы\n')

default_data = (catch_df['date'][0], catch_df['date'][len(catch_df) - 1])
start, end = default_data

#### Возьмем уникальный лист владельцев судов

In [62]:
catcher_owners = catch_df['id_own'].unique()
traider_owners = ext1['id_own'].unique()

market_owners = []
for owner in traider_owners:
    if owner in catcher_owners:
        market_owners.append(owner)  

In [61]:
print(len(catcher_owners), len(traider_owners), len(market_owners))

350 159 121


#### Как видим, db2 не полностью покрывает всех владельцев, следовательно, будем использовать для поиска только тех, которые задействованы в обоих БД

In [167]:
df = calculatingSummOfFishTransfering('минтай', start, end)

Unnamed: 0,id_ves,date,id_region,id_fish,catch_volume,id_regime,permit,id_own,fish
0,4524386,2022-01-01,272,400,31874.00,41,852296,7115834,минтай
1,474886,2022-01-01,277,886,476.00,1,5401178,1809581,краб-стригун красный
2,3160994,2022-01-01,116,409,26310.00,15,3869431,5258490,путассу (северная)
3,1234400,2022-01-01,272,292,9541.00,1,2169057,1809581,треска
4,1234400,2022-01-01,272,113,62.00,1,2169057,1809581,палтус черный
...,...,...,...,...,...,...,...,...,...
85082,4992471,2022-04-20,108,410,1166.00,18,1518104,8379463,сайда
85083,7645033,2022-04-20,265,292,1345.00,97,1750243,1002633,треска
85084,5951077,2022-04-20,507,113,4.00,97,5099847,440996,палтус черный
85085,5292474,2022-04-20,206,147,505.05,97,229919,7829965,окунь золотистый


In [175]:
def getAllTransfers(id, start, end):
    incoming = calculatingSummOfFish(id, start, end)
    outcoming = calculatingSummOfFishTransfering(id, start, end)
    
    print(incoming.head(5))
    print(outcoming.head(5))

In [None]:
getAllTransfers('минтай', start, end)

In [141]:
ext1

Unnamed: 0,id_fishery,id_own,date_fishery,numPart,id_Plat,id_vsd,Name_Plat,Product_period,Region_Plat
0,2208303,6493928,2022-05-24 00:00:00,\N,5022636,8745666,\N,\N,\N
1,5538439,3846064,2022-05-24 00:00:00,\N,839830,3846313,\N,\N,\N
2,8688535,3107158,2022-05-24 00:00:00,\N,9310751,5215676,\N,\N,\N
3,8688535,3107158,2022-05-24 00:00:00,\N,9310751,4008716,\N,\N,\N
4,9102536,6493928,2022-05-24 00:00:00,\N,2900344,8522784,\N,\N,\N
...,...,...,...,...,...,...,...,...,...
3260793,2140098,7372847,2020-12-30 00:00:00,\N,4176694,2113660,\N,\N,\N
3260794,3458281,7115834,2020-12-29 00:00:00,\N,906811,6111511,\N,\N,\N
3260795,3458281,7115834,2020-12-29 00:00:00,\N,906811,3138810,\N,\N,\N
3260796,3458281,7115834,2020-12-29 00:00:00,\N,906811,2081767,\N,\N,\N


In [101]:
ext1[ext1['id_own'] == 5502023]

Unnamed: 0,id_fishery,id_own,date_fishery,numPart,id_Plat,id_vsd,Name_Plat,Product_period,Region_Plat
15313,-1,5502023,2022-04-20 02:38:01,\N,2293928,2915061,732210,\N,Камчатский край
15314,-1,5502023,2022-04-20 02:38:01,\N,2293928,9000427,732210,\N,Камчатский край
15315,-1,5502023,2022-04-20 02:37:57,\N,6724365,5761063,7610458,\N,Приморский край
15316,-1,5502023,2022-04-20 02:37:34,\N,6724365,2116583,7610458,\N,Приморский край
15317,-1,5502023,2022-04-20 02:37:04,\N,6724365,5968235,7610458,\N,Приморский край
...,...,...,...,...,...,...,...,...,...
3112990,-1,5502023,2022-01-02 00:02:35,\N,5370753,4066625,3959452,2022-01-02 00:00:00,г. Москва
3112991,-1,5502023,2022-01-02 00:02:35,\N,5370753,3325036,3959452,2022-01-02 00:00:00,г. Москва
3112992,-1,5502023,2022-01-02 00:02:35,\N,5370753,9145756,3959452,2022-01-02 00:00:00,г. Москва
3112993,-1,5502023,2022-01-02 00:02:19,\N,3354874,4471335,1137583,2022-01-02 00:00:00,Республика Хакасия


In [108]:
ext2

Unnamed: 0,id_vsd,num_vsd,id_fish,fish,date_vsd,volume,unit
0,7230988,8934601,400,минтай,2022-01-01 00:39:56,75480,\N
1,3420538,1439843,400,минтай,2022-01-01 01:34:08,62586,\N
2,9509257,342637,292,треска,2022-01-01 01:34:11,1155,\N
3,1894307,1801854,400,минтай,2022-01-01 01:34:05,111048,\N
4,9410489,3126516,88,окунь-клювач,2022-01-01 01:34:11,1844,\N
...,...,...,...,...,...,...,...
3115275,7436163,4038632,294,краб-стригун опилио,2022-05-25 12:54:52,774,\N
3115276,4519348,9520759,294,краб-стригун опилио,2022-05-25 12:57:18,5533,\N
3115277,1445226,7953654,292,треска,2022-05-25 12:19:02,8235,\N
3115278,7624369,1390453,408,пикша,2022-05-25 12:19:02,756,\N
