In [1]:
import datetime
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import phik
import scipy.stats as stats
import seaborn as sns
import statsmodels.api as sm
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'
sns.set_theme(context='talk', style='whitegrid', palette='deep')
plt.rcParams['figure.figsize'] = 10, 7
plt.rcParams['font.size'] = 20
plt.rcParams['axes.labelsize'] = 25
plt.rcParams['figure.titlesize'] = 32
plt.rcParams['axes.titlesize'] = 32
plt.rcParams['savefig.format'] = 'pdf'
plt.rcParams['figure.autolayout'] = 'true'
plt.rcParams['figure.frameon'] = 'false'
plt.rcParams['axes.spines.left'] = 'false'
plt.rcParams['axes.spines.right'] = 'false'
plt.rcParams['axes.spines.top'] = 'false'
plt.rcParams['legend.fancybox'] = 'false'
plt.rcParams['axes.spines.bottom'] = 'false'
plt.rcParams['font.size'] = 20
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['axes.facecolor'] = 'white'
# для графиков, где надо много цветов, юзайте воть:
sns.set_palette(sns.color_palette('deep'))
# а по дефолту воть:
sns.set_palette(sns.color_palette('BuGn_r', n_colors=10)[2::3])
pd.set_option('display.max_columns', 60)

In [2]:
feeding_details_22 = pd.read_csv('datasets/2022-feeding-tasks-details.csv', on_bad_lines='skip')
feeding_22 = pd.read_csv('datasets/2022-feeding-tasks.csv', on_bad_lines='skip')

feeding_details_23 = pd.read_csv('datasets/2023-feeding-tasks-details.csv', on_bad_lines='skip')
feeding_23 = pd.read_csv('datasets/2023-feeding-tasks.csv', on_bad_lines='skip')

feeding_details_24 = pd.read_csv('datasets/2024-feeding-tasks-details.csv', on_bad_lines='skip')
feeding_24 = pd.read_csv('datasets/2024-feeding-tasks.csv', on_bad_lines='skip')

feeding_details_25 = pd.read_csv('datasets/2025-feeding-tasks-details.csv', on_bad_lines='skip')
feeding_25 = pd.read_csv('datasets/2025-feeding-tasks.csv', on_bad_lines='skip')

In [3]:
monthly_feeding = pd.read_excel('datasets/Ekoniva_dataset.xlsx', sheet_name='Feeding')
herd_metrics = pd.read_excel('datasets/Ekoniva_dataset.xlsx', sheet_name='Herd maintenance').replace('-', np.nan)
production_indicators = pd.read_excel('datasets/Ekoniva_dataset.xlsx', sheet_name='Dairy indicators').replace('-', np.nan)

In [4]:
def merge_tables() -> pd.DataFrame:
    """
    Merges all feeding dataframes with all details dataframes
    returns: merged dataframe
    rtype: pd.DataFrame
    """
    feeding_22["year"] = "2022"
    feeding_23["year"] = "2023"
    feeding_24["year"] = "2024"
    feeding_25["year"] = "2025"
    feeding_details_22["year"] = "2022"
    feeding_details_23["year"] = "2023"
    feeding_details_24["year"] = "2024"
    feeding_details_25["year"] = "2025"

    feeding_all = pd.concat([feeding_22, feeding_23, feeding_24, feeding_25], ignore_index=True)
    feeding_details_all = pd.concat([feeding_details_22, feeding_details_23, feeding_details_24, feeding_details_25], ignore_index=True)
    
    return feeding_all.merge(feeding_details_all, on=["FeedingTaskID", "SectionID", "year", "PhysiologicalGroupID", "PhysiologicalGroupName"], how="left"), feeding_all, feeding_details_all

feeding_and_details, feeding_all, feeding_details_all = merge_tables()


In [5]:
feeding_details_all[feeding_details_all['IngredientName'].isin(["01.01", "02.01", "03.05", "12.01"])]


Unnamed: 0,FeedingTaskID,SectionID,PhysiologicalGroupID,PhysiologicalGroupName,IngredientID,IngredientName,IngredientType,PhysicalWeight_kg,year
3127222,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,3,4,Д1,68,12.01,Forage,1816.000,2023
3127223,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,3,4,Д1,70,03.05,Forage,1844.000,2023
3127236,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,4,4,Д1,68,12.01,Forage,1924.000,2023
3127237,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,4,4,Д1,70,03.05,Forage,1913.000,2023
3127250,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,1,4,Д1,68,12.01,Forage,1205.000,2023
...,...,...,...,...,...,...,...,...,...
4083946,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,1,4,Д1,70,03.05,Forage,613.772,2023
4083960,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,2,4,Д1,134,02.01,Forage,298.588,2023
4083961,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,2,4,Д1,70,03.05,Forage,613.316,2023
4083975,Farms/EkoNiva1C.0db18646-1ce2-11ea-bbbf-b88303...,7,4,Д1,134,02.01,Forage,290.602,2023


In [6]:
import re
from collections import Counter
GROUP_KEYWORDS = {
    # Основной корм/кормовая база (фураж) — клетчатка, объём рациона
    "forage_bulk": [
        "силос", "сенаж", "сено", "солома", "soloma", "зел", "cилос",
        "люцерн", "клевер", "сорго", "рожь", "карнаж", "корнаж", "с-ж", "-с"
    ],

    # Энергетические компоненты — зерно, крахмал, меласса, 
    "energy_source": [
        "кукуруз", "ячмень", "овёс", "овес", "мука", "тритикале", "пшени",
        "плющ", "глютен", "концентрат", "патока", "зеренопродукт", "зерно", "7702.02.05.07", "7702.01.05.07",
    ],

    # Протеиновые компоненты
    "protein_source": [
        "соя", "горох", "шрот", "ш.соев", "шр подсолнеч", "жмых", "рапсов", "льнян", "бобы кормовые", "чечевиц", 
        "оболочк"
    ],

    # Жиры/защищённые жиры
    "fat_oils": [
        "масло", "жир", "лен"
    ],

    # Дрожжи / пробиотические добавки (живые дрожжи, пивные дрожжи, закваски)
    "yeast_products": [
        "дрож", "шаумацид"
    ],

    # Кормовые добавки: пробиотики, ферменты, сорбенты, консерванты — поддержка рубца, профилактика
    "feed_additives_probiotics": [
        "аквасейф", "мегабуст", "холин", "пробиот", "фермент", "сорбент", "консерван",
        "минвит", "биотек", "бвмк"
    ],

    # Комплексные премиксы/смеси/комбикорма — для молодых животных, концентраты
    "compound_feed_calf_replacements": [
        "стартер", "престартер", "бустер милк", "кормосмесь",
        "комбикорм", "кк", "kk"
    ],

    # Премиксы / специализированные добавки (разные градации премиксов)
    "premix_feed": [
        "премикс", "предсмесь", "пр.дойный"
    ],

    # Побочные продукты / отбросы пищевой промышленности
    "byproducts": [
        "дробина", "зерноотход", "остатк", "oстатки", "барда", "сухая стружка", "жом"
    ],

    # Влажные сырьё / источники влаги (влияют на консистенцию и санитарное состояние)
    "wet_ingredients_water": [
        "вода", "молоко", "зцм", "зск", "заменитель", "заменитель молока", "сыворотк", "лед"
    ],

    # Минералы / соли / буферы / известковое/фосфатные добавки
    "minerals_buffers": [
        "соль", "мел", "известняков", "известная мука", "мука известняковая", "монокальций фосфат",
        "галит", "поташ", "пропионат", "кисол"
    ],

    # Ветпрепараты / медикаменты (требуют учёта и контроля; не классифицируем как корм)
    "medications": [
        "ампролиум", "провилит", "электролит", "сода", "кальвобустер", "глицерин", "soda",
        "пропиленгликоль"
    ]
}

def group(feeding_and_details):
    def normalize_name(s):
        if pd.isna(s):
            return ""
        s = str(s).strip()
        s = s.replace("//", "/")
        s = re.sub(r"\s+", " ", s)
        s = s.lower()
        return s
    feeding_and_details['norm_ingr_name'] = feeding_and_details['IngredientName'].apply(normalize_name)
    code_re = re.compile(r'^\d+(?:\.\d+)+$') #Честно сам писал (100%)
    feeding_and_details['is_code'] = feeding_and_details['norm_ingr_name'].str.match(code_re)
    smth = pd.ExcelFile("datasets/Ekoniva_dataset.xlsx")
    nsi = smth.sheet_names[0]
    cultures = pd.read_excel(
        smth,
        sheet_name=nsi,
        usecols="B:C",
        skiprows=81,
        nrows=40 
    )
    cultures.columns = ["code", "name"]
    razdels = pd.read_excel(
        smth,
        sheet_name=nsi,
        usecols="E:H",
        skiprows=81,
        nrows=88  
    )
    razdels.columns = ["code", "region", "prop_farm_name", "farm_name"]
    feed_type = pd.read_excel(
        smth,
        sheet_name=nsi,
        usecols="B:C",
        skiprows=123,
        nrows=10
    )
    feed_type.columns = ["code", "feed_name"]
    cultures_map = cultures.set_index('code')['name'].to_dict()
    feed_type_map = feed_type.set_index('code')['feed_name'].to_dict()
    def decode_ingr_code(code):
        if pd.isna(code) or not isinstance(code, str):
            return code
        parts = code.split('.')
        if len(parts) < 4:
            return code
        try:
            culture_code = float(parts[2])
            feed_code = float(parts[3])
        except ValueError:
            return code    
        culture_name = cultures_map.get(culture_code, str(culture_code))
        feed_name = feed_type_map.get(feed_code, str(feed_code))
        return f"{culture_name} {feed_name}".lower()
    feeding_and_details['decoded_name'] = feeding_and_details.apply(
        lambda row: decode_ingr_code(row['norm_ingr_name']) if row['is_code'] else row['norm_ingr_name'],
        axis=1
    )



    def classify_ingredient(name):
        if not isinstance(name, str):
            return "other"
        name_lower = name.lower()
        for group, kat_list in GROUP_KEYWORDS.items():
            for kat in kat_list:
                if kat in name_lower:
                    return group
        return "other"
    feeding_and_details['ingredient_group'] = feeding_and_details['decoded_name'].apply(classify_ingredient)

    return feeding_and_details[feeding_and_details["ingredient_group"] != "other"]

feeding_and_details = group(feeding_and_details)


In [27]:
feeding_and_details[feeding_and_details["ingredient_group"] == "other"]["decoded_name"].unique()

array(['12.01', '03.05', '01.01', '02.01', 'oстатки', ''], dtype=object)

Тест на каузальность грейнджера

In [22]:
feeding_and_details.ingredient_group.value_counts()

ingredient_group
forage_bulk                        6303915
energy_source                      3296768
protein_source                     2659625
compound_feed_calf_replacements     982387
wet_ingredients_water               978807
premix_feed                         857876
minerals_buffers                    480877
byproducts                          246254
fat_oils                            236806
medications                         217356
feed_additives_probiotics            75606
yeast_products                       71722
other                                 3366
Name: count, dtype: int64

In [22]:
feeding_and_details[feeding_and_details["ingredient_group"] == "byproducts"]["decoded_name"].unique()

array(['дробина', 'дробина сырая', 'остатки', 'остатки д3',
       'дробина сухая', 'пивная дробина', 'остатки 2-5', 'остатки мол.',
       'жом свекловичный сухой', 'жом свекловичный', 'барда сухая',
       'жом свекловичный свежий', 'жом свекловичный сушенный',
       'жом сухой', 'остатки д1', 'жом', 'дробина сухая гранулированная',
       'дробина пивная', 'остатки т1', 'остаткти', 'остатки 3-5',
       'жом свекловичный сырой', 'oстатки', 'остатки летник',
       'жом свекольный сухой'], dtype=object)

In [42]:
feeding_and_details[feeding_and_details["ingredient_group"] == "other"]["decoded_name"].unique()

array(['12.01', '03.05', '01.01', '02.01', ''], dtype=object)

In [20]:
feeding_and_details[(feeding_and_details["FeedingTaskID"] == "Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec18fdc6/92630") & (feeding_and_details["PhysiologicalGroupName"] == "Нетели")]

Unnamed: 0,FeedingTaskID,Date,FarmName,FeedNumber,SectionID,PhysiologicalGroupID,PhysiologicalGroupName,PhysiologicalGroupHeadCount,Appetite,RationName,RationPart,TotalWeight_kg,CompletedAt,year,IngredientID,IngredientName,IngredientType,PhysicalWeight_kg,norm_ingr_name,is_code,decoded_name,ingredient_group
247,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,24,90,53,Нетели,63,1.0,С1,1.0,2228.0,2022-01-01T09:47:00.0000000,2022,796.0,Сено луговое,Forage,374.586,сено луговое,False,сено луговое,forage_bulk
248,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,24,90,53,Нетели,63,1.0,С1,1.0,2228.0,2022-01-01T09:47:00.0000000,2022,153.0,2262.03.01.01.1.21,Forage,691.543,2262.03.01.01.1.21,True,люцерна сенаж,forage_bulk
249,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,24,90,53,Нетели,63,1.0,С1,1.0,2228.0,2022-01-01T09:47:00.0000000,2022,8.0,Соль,VitaminMineral,2.618,соль,False,соль,minerals_buffers
250,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,24,90,53,Нетели,63,1.0,С1,1.0,2228.0,2022-01-01T09:47:00.0000000,2022,78.0,Премикс Дойный,VitaminMineral,9.819,премикс дойный,False,премикс дойный,premix_feed
251,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,24,90,53,Нетели,63,1.0,С1,1.0,2228.0,2022-01-01T09:47:00.0000000,2022,152.0,2262.01.05.02.1.21,Forage,540.858,2262.01.05.02.1.21,True,кукуруза силос,forage_bulk


In [None]:
"кукуруз"

False

In [12]:
feeding_and_details[
    feeding_and_details["IngredientName"]
        .str.lower()
        .str.contains("дрожжи", na=False)
]


Unnamed: 0,FeedingTaskID,Date,FarmName,FeedNumber,SectionID,PhysiologicalGroupID,PhysiologicalGroupName,PhysiologicalGroupHeadCount,Appetite,RationName,RationPart,TotalWeight_kg,CompletedAt,year,IngredientID,IngredientName,IngredientType,PhysicalWeight_kg
42,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,37,99,58,Т3 (9-12 мес.),304,1.0,Т3 9-12,1.0,4360.0,2022-01-01T10:45:00.0000000,2022,145.0,Дрожжи кормовые,Concentrate,216.796
49,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,38,105,58,Т3 (9-12 мес.),157,1.0,Т3 9-12,1.0,2252.0,2022-01-01T10:50:00.0000000,2022,145.0,Дрожжи кормовые,Concentrate,111.204
161,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,6,38,56,Т1 (3-5 мес.),417,1.4,Т1 3-5,1.0,3712.0,2022-01-01T06:11:00.0000000,2022,145.0,Дрожжи кормовые,Concentrate,296.169
166,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,9,93,56,Т1 (3-5 мес.),364,1.4,Т1 3-5,1.0,3125.0,2022-01-01T06:15:00.0000000,2022,145.0,Дрожжи кормовые,Concentrate,237.831
221,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,2022-01-01,ЖК Добрино,19,58,59,Т4 (>1 года),129,1.1,Т4 13-20,1.0,2496.0,2022-01-01T07:35:00.0000000,2022,145.0,Дрожжи кормовые,Concentrate,117.723
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16410136,Farms/EkoNiva1C.9d4645bb-570c-11e2-9cc0-00155d...,2025-10-22,ЖК Плеханово,22,136,10,С2,70,1.3,С2,1.0,3439.0,2025-10-22T07:34:00.0000000,2025,463.0,Дрожжи кормовые,Concentrate,27.519
16410145,Farms/EkoNiva1C.9d4645bb-570c-11e2-9cc0-00155d...,2025-10-22,ЖК Плеханово,23,212,16,Нетели,33,1.2,С2,1.0,2582.0,2025-10-22T07:35:00.0000000,2025,463.0,Дрожжи кормовые,Concentrate,11.587
16410154,Farms/EkoNiva1C.9d4645bb-570c-11e2-9cc0-00155d...,2025-10-22,ЖК Плеханово,23,212,10,С2,24,1.2,С2,1.0,2582.0,2025-10-22T07:35:00.0000000,2025,463.0,Дрожжи кормовые,Concentrate,8.427
16411317,Farms/EkoNiva1C.6c69fe1b-2b6f-11e8-80c4-1c98ec...,2025-10-22,ЖК Подболотье,15,2,3,Д1,147,1.1,Д1,0.6,5732.0,2025-10-22T08:28:57.1278426,2025,271.0,Дрожжи кормовые,VitaminMineral,30.692


In [7]:
df = feeding_and_details.copy()

grouped = df.groupby(
    ["Date", "FarmName", "PhysiologicalGroupName", "ingredient_group"]
)["PhysicalWeight_kg"].sum().reset_index()

planned = df[
    ["Date", "FarmName", "PhysiologicalGroupName", "Appetite", "PhysiologicalGroupHeadCount"]
].drop_duplicates()

appetites = planned.groupby(
    ["Date", "FarmName", "PhysiologicalGroupName"]
).agg({
    "Appetite": "mean",
    "PhysiologicalGroupHeadCount": "sum"
}).reset_index().rename(columns={"PhysiologicalGroupHeadCount": "CowCount"})

date_phy_feeding = pd.merge(
    grouped,
    appetites,
    on=["Date", "FarmName", "PhysiologicalGroupName"],
    how="left"
)
date_phy_feeding["WeightPerCow"] = date_phy_feeding["PhysicalWeight_kg"] / date_phy_feeding["CowCount"]
date_phy_feeding.to_csv("out1.csv")

In [11]:
others = feeding_details_all[feeding_details_all['ingredient_group'] == 'other']
others['decoded_name'].value_counts()

KeyError: 'ingredient_group'

In [None]:
others[others["decoded_name"] == ""]

Unnamed: 0,FeedingTaskID,SectionID,PhysiologicalGroupID,PhysiologicalGroupName,IngredientID,IngredientName,IngredientType,PhysicalWeight_kg,year,norm_ingr_name,is_code,decoded_name,ingredient_group
11574693,Farms/EkoNiva1C.9d4645a7-570c-11e2-9cc0-00155d...,342,54,Т1 (3-5 мес.),260,,Premixture,0.0,2024,,False,,other
11592293,Farms/EkoNiva1C.9d4645a7-570c-11e2-9cc0-00155d...,342,54,Т1 (3-5 мес.),260,,Premixture,0.0,2024,,False,,other
11608364,Farms/EkoNiva1C.9d4645a7-570c-11e2-9cc0-00155d...,342,54,Т1 (3-5 мес.),260,,Premixture,0.0,2024,,False,,other
11622461,Farms/EkoNiva1C.9d4645a7-570c-11e2-9cc0-00155d...,342,54,Т1 (3-5 мес.),260,,Premixture,0.0,2024,,False,,other


In [None]:
feeding_details_all

Unnamed: 0,FeedingTaskID,SectionID,PhysiologicalGroupID,PhysiologicalGroupName,IngredientID,IngredientName,IngredientType,PhysicalWeight_kg,year,norm_ingr_name,is_code,decoded_name,ingredient_group
0,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,9,2,Д1,125,Солома покупная,Forage,150.259,2022,солома покупная,False,солома покупная,forage
1,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,9,2,Д1,773,3645.01.01.01.1.20,Forage,880.444,2022,3645.01.01.01.1.20,True,люцерна сенаж,forage
2,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,9,2,Д1,72,Комбикорм 10 группы,Concentrate,542.257,2022,комбикорм 10 группы,False,комбикорм 10 группы,premix_blend
3,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,9,2,Д1,82,Кукуруза сухая,Concentrate,991.793,2022,кукуруза сухая,False,кукуруза сухая,energy
4,Farms/EkoNiva1C.216d4235-2852-11e8-80c4-1c98ec...,9,2,Д1,129,Шрот подсолнечный,Concentrate,156.054,2022,шрот подсолнечный,False,шрот подсолнечный,protein
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16443074,Farms/EkoNiva1C.6c69fe1b-2b6f-11e8-80c4-1c98ec...,131,1,Нетели,119,Солома (общ.),Forage,120.643,2025,солома (общ.),False,солома (общ.),forage
16443075,Farms/EkoNiva1C.6c69fe1b-2b6f-11e8-80c4-1c98ec...,131,1,Нетели,335,6203.01.01.01.1.25,Forage,549.169,2025,6203.01.01.01.1.25,True,люцерна сенаж,forage
16443076,Farms/EkoNiva1C.6c69fe1b-2b6f-11e8-80c4-1c98ec...,131,1,Нетели,331,Жом свекловичный сухой,Concentrate,36.676,2025,жом свекловичный сухой,False,жом свекловичный сухой,byproduct
16443077,Farms/EkoNiva1C.6c69fe1b-2b6f-11e8-80c4-1c98ec...,131,1,Нетели,143,Премикс молодняк 6-24,VitaminMineral,10.049,2025,премикс молодняк 6-24,False,премикс молодняк 6-24,premix


In [None]:
feeding_details_all = feeding_details_all[feeding_details_all["ingredient_group"] != "other"]

In [None]:
feeding_details_all.to_csv("out.csv")