In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from itertools import product
import requests
from bs4 import BeautifulSoup
import os

### PRODUCTS EXPIRATION DATA

In [2]:
# products and their expiration dates
expiration = {
    2: [
        '01. Сэндвич "Классический"', 
        '02. Сэндвич "С курицей"',
        '03. Сэндвич Сырный Соус',
        '04. Сэндвич "PICNIC"',
        '06. Хот - Дог',
        '08. Гамбургер с котлетой',
        '29. Burrito стрипсы'
        ],

    3: [
        '16. Багет пшенично-ржаной',
        '15. Багет пшеничный',
        '21. Тостовый хлеб',
        '23. Батон нарезной',
        '43. Батон к чаю',
        '44. Батон отрубной',
        "45. Батон ''Царский''"
        ],

    5: [
        '10. Булочка Ярославка',
        '59. Булочки для фуда БУРГЕР',
        '60. Булочки для фуда Хот-Дог',
        '18. Лаваш',
        '17. Ролл',
        "42. Батон ''Живая Рожь''"
        ]
}

### ADD NEW DATA TO MAIN DATASET

In [3]:
def add_new_data_to_initial_dataset():
    # reading data
    initial_DF = pd.read_csv('dataset/initial_dataset.csv')
    print(initial_DF.shape)
    temp_df = pd.read_excel('temp/data.xlsx', skipfooter=3)
    
    # renaming columns
    temp_df.rename({
        'Дата.1': 'Date',
        'Маршрут': 'District',
        'Контрагент': 'Address',
        'Номенклатура': 'Product',
        'Количество': 'Sales',
        'Обмен': 'Returns'
    }, axis=1, inplace=True)
    
    # concating new and old data
    df = pd.concat([initial_DF, temp_df])
    
    # converting a date columns to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    
    # sorting data by date
    df = df.sort_values('Date').reset_index(drop=True).drop_duplicates()
    
    # deleting unnecessary data according to date limit
    date_limit = df['Date'].iloc[-1] - timedelta(weeks=5)
    df = df[df['Date'] >= date_limit]
    
    print(df.shape)
    df.to_csv('dataset/initial_dataset.csv', index=False)

### SHIFTING RETURNS

In [4]:
def shifting_returns_and_merging_with_sales(initial_DF, days_to_shift):
    if days_to_shift not in [2, 3, 5]:
        return
    
    # reading data
    initial_DF['Date'] = pd.to_datetime(initial_DF['Date'])
    
    # filtering data by products
    df = initial_DF[initial_DF['Product'].isin(expiration[days_to_shift])]
    
    # seperating sales and returns
    df_sales = df.drop(['Returns'], axis=1)
    df_returns = df.drop(['Sales'], axis=1)
    
    # shifting date 
    df_returns['Date'] = df_returns['Date'] - timedelta(days=days_to_shift)
    
    # getting date range of a final df
    date_range = df_sales.merge(
        df_returns,
        on=['Date', 'District', 'Address', 'Product'],
        how='inner'
    )['Date'].unique()
    
    # merging sales and returns
    main_DF = df_sales.merge(
        df_returns,
        on=['Date', 'District', 'Address', 'Product'],
        how='outer'
    )
    
    # final data
    main_DF = main_DF[main_DF['Date'].isin(date_range)].fillna(0)

    return main_DF

In [5]:
# getting 3 seperate DFs of 3 groups of products with shifted returns
initial_DF = pd.read_csv('dataset/initial_dataset.csv')

two_days_expiricy_DF = shifting_returns_and_merging_with_sales(initial_DF, days_to_shift=2)
three_days_expiricy_DF = shifting_returns_and_merging_with_sales(initial_DF, days_to_shift=3)
five_days_expiricy_DF = shifting_returns_and_merging_with_sales(initial_DF, days_to_shift=5)

### FILL MISSED ADDRESSES

In [6]:
combined_DF = pd.concat([two_days_expiricy_DF, three_days_expiricy_DF, five_days_expiricy_DF])
combined_DF.sort_values('Date', inplace=True)

In [7]:
limit_date_to_get_data_about_stores = combined_DF['Date'].iloc[-1] - timedelta(weeks=2)
condition = (combined_DF['Date'] >= limit_date_to_get_data_about_stores)

In [8]:
all_addresses = combined_DF[condition]['Address'].unique()
all_products = combined_DF[condition]['Product'].unique()

len(all_addresses), len(all_products)

(2399, 20)

In [9]:
start_date = combined_DF.iloc[0]['Date']
end_date = combined_DF.iloc[-1]['Date'] + timedelta(weeks=1)

In [10]:
data = []

# Наполняем список комбиниациями
current_date = start_date
while current_date <= end_date:
    combinations = list(product(all_products, all_addresses))
    data.extend([(current_date, address, product) for product, address in combinations])
    current_date += timedelta(days=1)
    
full_DF = pd.DataFrame(data, columns=["Date", "Address", "Product"])

In [11]:
# Добавляем колонну продаж и возвратов
full_DF['Sales'] = 0
full_DF['Returns'] = 0

# Далем слияние двух таблиц, чтобы продажи и возвраты встали на место.
main_DF = full_DF[['Date', 'Address', 'Product']].merge(
    combined_DF[['Date', 'Address', 'Product', 'Sales', 'Returns']], 
    on=['Date', 'Address', 'Product'], 
    how='left'
)

# Заполняем 'null' нулями
main_DF = main_DF.fillna(0)

### DELETING ADDRESS-PRODUCT COMBINATIONS THAT ARE NOT RELEVANT

In [12]:
limit_date_to_get_data_about_store_sales = main_DF.sort_values('Date')['Date'].iloc[-1] - timedelta(weeks=2)
condition = (main_DF['Date'] >= limit_date_to_get_data_about_store_sales)

In [13]:
grouped_add_prod_sales_DF = main_DF[condition].groupby(['Address', 'Product']).agg({
    'Sales': 'sum'
}).reset_index()

In [14]:
existing_address_product_combinations_DF = grouped_add_prod_sales_DF[grouped_add_prod_sales_DF['Sales'] > 0].drop('Sales', axis=1)

In [15]:
existing_address_product_combinations_DF.shape

(27245, 2)

In [16]:
main_DF = main_DF.merge(existing_address_product_combinations_DF,
             on=['Address', 'Product'])

### REMOVE SUNDAYS

In [17]:
main_DF.shape

(1117489, 5)

In [18]:
main_DF['Weekday'] = main_DF['Date'].dt.weekday
main_DF = main_DF[main_DF['Weekday'] != 6]

In [19]:
main_DF.shape

(954019, 6)

### SHIFTING SALES AND RETURNS

In [20]:
main_DF.sort_values(by=['Address', 'Product', 'Date'], inplace=True)

In [21]:
days_to_shift = [1, 2, 3, 4, 5, 6, 13, 20]
for day in days_to_shift:
    main_DF[f'Sales_{day + 1}'] = main_DF.groupby(['Address', 'Product'])['Sales'].shift(day)
    main_DF[f'Returns_{day + 1}'] = main_DF.groupby(['Address', 'Product'])['Returns'].shift(day)

In [22]:
main_DF.fillna(0, inplace=True)

In [23]:
main_DF.groupby(['Date'])['Sales', 'Sales_2', 'Sales_3', 'Sales_4', 'Sales_5', 'Sales_6', 'Sales_14', 'Sales_21'].sum()

  main_DF.groupby(['Date'])['Sales', 'Sales_2', 'Sales_3', 'Sales_4', 'Sales_5', 'Sales_6', 'Sales_14', 'Sales_21'].sum()


Unnamed: 0_level_0,Sales,Sales_2,Sales_3,Sales_4,Sales_5,Sales_6,Sales_14,Sales_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-07-03,46474.0,127.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-07-04,41634.0,46427.0,168.0,117.0,0.0,0.0,0.0,0.0
2024-07-05,9126.0,41562.0,46416.0,74.0,134.0,0.0,0.0,0.0
2024-07-06,58984.0,9316.0,41567.0,46501.0,164.0,189.0,0.0,0.0
2024-07-08,46371.0,58967.0,9458.0,41663.0,46430.0,230.0,0.0,0.0
2024-07-09,48422.0,46272.0,58874.0,9391.0,41796.0,46487.0,0.0,0.0
2024-07-10,50521.0,48599.0,46346.0,58834.0,9342.0,41755.0,0.0,0.0
2024-07-11,42503.0,50288.0,48482.0,46467.0,58854.0,9293.0,159.0,0.0
2024-07-12,9012.0,42483.0,50255.0,48285.0,46359.0,58807.0,124.0,0.0
2024-07-13,57823.0,9241.0,42525.0,50354.0,48285.0,46382.0,140.0,0.0


### STATISTICS

In [24]:
%%time
main_DF['Returns_percentage'] = main_DF['Returns_7'] / (main_DF['Sales_7'] + main_DF['Returns_7'])

main_DF['Mean_sales_in_week'] = main_DF[['Sales_2', 'Sales_3', 'Sales_4', 'Sales_5', 'Sales_6', 'Sales_7']].mean(axis=1)
main_DF['Median_sales_in_week'] = main_DF[['Sales_2', 'Sales_3', 'Sales_4', 'Sales_5', 'Sales_6', 'Sales_7']].median(axis=1)

main_DF['Mean_sales_in_3_weeks'] = main_DF[['Sales_7', 'Sales_14', 'Sales_21']].mean(axis=1)
main_DF['Median_sales_in_3_weeks'] = main_DF[['Sales_7', 'Sales_14', 'Sales_21']].median(axis=1)

main_DF['No_sales_in_week'] = main_DF[['Sales_2', 'Sales_3', 'Sales_4', 'Sales_5', 'Sales_6', 'Sales_7']].sum(axis=1) == 0
main_DF['No_returns_in_week'] = main_DF[['Returns_2', 'Returns_3', 'Returns_4', 'Returns_5', 'Returns_6', 'Returns_7',]].sum(axis=1) == 0

main_DF['Total_sales_in_week'] = main_DF[['Sales_2', 'Sales_3', 'Sales_4', 'Sales_5', 'Sales_6', 'Sales_7']].sum(axis=1)

main_DF['Mean_returns_in_week'] = main_DF[['Returns_2', 'Returns_3', 'Returns_4', 'Returns_5', 'Returns_6', 'Returns_7',]].mean(axis=1)
main_DF['Mean_returns_in_3_weeks'] = main_DF[['Returns_7', 'Sales_14', 'Returns_21']].mean(axis=1)

CPU times: total: 1.59 s
Wall time: 1.64 s


### SET BACK DISTRICTS

In [25]:
district_address_DF = combined_DF.sort_values('Date')[['District', 'Address']].drop_duplicates()

In [26]:
district_address_DF = district_address_DF.drop_duplicates('Address', keep='last')

In [27]:
district_address_DF['Address'].unique().shape, district_address_DF.shape

((2458,), (2458, 2))

In [28]:
main_DF = main_DF.merge(district_address_DF, on='Address')

In [29]:
columns_order = list(main_DF.columns)
columns_order.insert(1, columns_order.pop())

In [30]:
main_DF = main_DF[columns_order]

### HANDLE SPECIAL ORDERS AND ANOMALIES

In [31]:
def isAnomaly(data):
    # Calculate mean and standard deviation
    mean = np.mean(data)
    std_dev = np.std(data)

    # Calculate quartiles and IQR
    Q1 = np.percentile(data, 25)
    Q3 = np.percentile(data, 75)
    IQR = Q3 - Q1

    # Calculate bounds for outliers using IQR
    const = 2
    lower_bound = Q1 - const * IQR
    upper_bound = Q3 + const * IQR

    iqr_anomalies = [x for x in data if x < lower_bound or x > upper_bound]
    
    return data['Sales_7'] in iqr_anomalies

In [32]:
%%time
main_DF['isAnomaly'] = False

condition = main_DF['Sales_7'] > 10
columns_to_check = ['Sales_2', 'Sales_3', 'Sales_4', 'Sales_5', 'Sales_6', 'Sales_7']


main_DF.loc[condition,'isAnomaly'] = main_DF[condition][columns_to_check].apply(isAnomaly, axis=1)

CPU times: total: 4.75 s
Wall time: 4.83 s


In [33]:
main_DF['Address'].unique().shape

(2370,)

In [34]:
main_DF[main_DF['isAnomaly'] == True]['Address'].unique().shape

(1122,)

In [35]:
main_DF['isAnomaly'].value_counts()

False    951303
True       2716
Name: isAnomaly, dtype: int64

### PREDICT DATAFRAME

In [36]:
limit_date_to_predict = main_DF.sort_values('Date')['Date'].iloc[-1] - timedelta(days=5)
condition = (main_DF['Date'] == limit_date_to_predict)

predict_DF = main_DF[condition].drop(['Sales', 'Returns'], axis=1)
predict_DF['Predict'] = predict_DF['Sales_7']
predict_DF['Delivered_7'] = predict_DF['Sales_7'] + predict_DF['Returns_7']

predict_DF

Unnamed: 0,Date,District,Address,Product,Weekday,Sales_2,Returns_2,Sales_3,Returns_3,Sales_4,...,Median_sales_in_week,Mean_sales_in_3_weeks,Median_sales_in_3_weeks,No_sales_in_week,No_returns_in_week,Total_sales_in_week,Mean_returns_in_week,Mean_returns_in_3_weeks,isAnomaly,Predict
30,2024-08-07,46 маршрут,"ТОО ""SALEKZ"" ул.Мухамедханова д.19","01. Сэндвич ""Классический""",2,0.0,0.0,3.0,0.0,3.0,...,1.5,1.666667,2.0,False,True,9.0,0.000000,0.000000,False,3.0
65,2024-08-07,46 маршрут,"ТОО ""SALEKZ"" ул.Мухамедханова д.19","02. Сэндвич ""С курицей""",2,0.0,0.0,5.0,1.0,5.0,...,2.5,3.333333,5.0,False,False,15.0,0.166667,0.000000,False,5.0
100,2024-08-07,46 маршрут,"ТОО ""SALEKZ"" ул.Мухамедханова д.19",03. Сэндвич Сырный Соус,2,0.0,0.0,5.0,0.0,4.0,...,2.0,3.333333,5.0,False,False,14.0,0.166667,0.000000,False,5.0
135,2024-08-07,46 маршрут,"ТОО ""SALEKZ"" ул.Мухамедханова д.19","04. Сэндвич ""PICNIC""",2,0.0,0.0,3.0,2.0,3.0,...,1.5,1.666667,2.0,False,False,9.0,0.333333,0.000000,False,3.0
170,2024-08-07,46 маршрут,"ТОО ""SALEKZ"" ул.Мухамедханова д.19",06. Хот - Дог,2,0.0,0.0,0.0,1.0,1.0,...,0.0,1.333333,2.0,False,False,3.0,0.333333,0.000000,False,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953874,2024-08-07,50 маршрут,"№1, ул.Туркестан 10","04. Сэндвич ""PICNIC""",2,0.0,0.0,5.0,0.0,5.0,...,2.5,5.000000,5.0,False,True,15.0,0.000000,2.666667,False,5.0
953909,2024-08-07,50 маршрут,"№1, ул.Туркестан 10",06. Хот - Дог,2,0.0,0.0,3.0,0.0,3.0,...,0.0,0.000000,0.0,False,True,6.0,0.000000,0.000000,False,0.0
953944,2024-08-07,50 маршрут,"№1, ул.Туркестан 10",08. Гамбургер с котлетой,2,0.0,0.0,3.0,0.0,3.0,...,0.0,0.000000,0.0,False,True,6.0,0.000000,0.000000,False,0.0
953979,2024-08-07,50 маршрут,"№1, ул.Туркестан 10",18. Лаваш,2,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,False,True,10.0,0.000000,0.000000,False,0.0


### TREND AND WEATHER FUNCTIONS

In [37]:
def calculate_trend(data):
    y1, y2, y3 = data[0], data[1], data[2]
    trend = ((y3 - y2) + (y2 - y1)) / 2
    
    return trend

In [38]:
def parse_weather_forecast():
    HEADERS = {
        "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Mobile Safari/537.36",
        "accept": "*/*"
    }

    URL = "https://www.gismeteo.kz/weather-astana-5164/10-days/"
    r = requests.get(URL, headers=HEADERS)
    soup = BeautifulSoup(r.text, 'html.parser')

    date_range = pd.date_range(start=datetime.now().date(), end=(datetime.now() + timedelta(days=9)).date())

    temperature_container = soup.find("div", class_="widget-row-chart widget-row-chart-temperature-air row-with-caption")
    temperature_elements = temperature_container.find_all("temperature-value") # type: ignore
    temperature_values = [temp['value'] for temp in temperature_elements if 'value' in temp.attrs]
    temperature_values = [int(temp) for temp in temperature_values][0::2]

    wind_speed_container = soup.find("div", class_="widget-row widget-row-wind row-wind-gust row-with-caption")
    wind_speed_elements = wind_speed_container.find_all("speed-value") # type: ignore
    wind_speed_values = [temp['value'] for temp in wind_speed_elements if 'value' in temp.attrs]
    wind_speed_values = [int(temp) for temp in wind_speed_values]

    humidity_container = soup.find("div", class_="widget-row widget-row-precipitation-bars row-with-caption")
    humidity_elements = humidity_container.find_all("div", class_= "row-item")

    humidity_values_uncleaned = [item.text for item in humidity_elements]

    humidity_values = []
    for temp in humidity_values_uncleaned:
        value_str = temp.strip().replace(',', '.')

        if value_str:
            humidity_values.append(float(value_str))

    weather_DF = pd.DataFrame({
        'Date': date_range,
        'Temperature': temperature_values,
        'Wind_speed': wind_speed_values,
        'Humidity': humidity_values
    })
    
    return weather_DF

In [39]:
def parse_weather_past():
    weather_past_DF = pd.read_excel(f"temp/weather/{os.listdir('temp/weather')[0]}", skiprows=6)
    weather_past_DF = weather_past_DF[['Местное время в Астане', 'T', 'Ff', 'U']]

    weather_past_DF.rename({
        'Местное время в Астане': 'Date',
        'T': 'Temperature',
        'Ff': 'wind_speed_values',
        'U': 'humidity_values'
    }, axis=1, inplace=True)

    weather_past_DF['Date'] = pd.to_datetime(weather_past_DF['Date'], dayfirst=True)
    weather_past_DF['Time'] = (weather_past_DF['Date'].dt.time).astype(str)
    
    time_to_filter = ['08:00:00', '11:00:00', '14:00:00', '17:00:00', '20:00:00']
    weather_past_DF = weather_past_DF[weather_past_DF['Time'].isin(time_to_filter)]
    weather_past_DF['Date'] = weather_past_DF['Date'].dt.date
    weather_past_DF = weather_past_DF.drop('Time', axis=1)

    weather_past_DF = weather_past_DF.groupby('Date').mean().reset_index()
    
    return weather_past_DF

In [40]:
predict_DF['Trend'] = predict_DF[['Sales_21', 'Sales_14', 'Sales_7']].apply(calculate_trend, axis=1)

In [41]:
weather_past_DF = parse_weather_past()
weather_past_DF['Date'] = pd.to_datetime(weather_past_DF['Date'], dayfirst=True)

In [42]:
# weather_DF = parse_weather_forecast()
# predict_DF = predict_DF.merge(weather_DF, on='Date', how='left')

### MATH MODEL FORMULA

In [43]:
predict_DF.fillna(0, inplace=True)

In [46]:
def categorize_sales(value):
    if value == 0:
        return 'A'
    elif 0 < value <= 10:
        return 'B'
    elif 11 < value <= 20:
        return 'C'
    elif 21 < value <= 30:
        return 'D'
    elif 31 < value:
        return 'E'
    else:
        return None
    
predict_DF['Sales_7_category'] = predict_DF['Sales_7'].apply(categorize_sales)

In [62]:
def increase_predict(data):
    if data['Sales_7_category'] == 'A':
        return 1
    
    elif data['Sales_7_category'] == 'B':
        return data['Predict'] + 1
    
    elif data['Sales_7_category'] == 'C':
        return data['Predict'] + 2
    
    elif data['Sales_7_category'] == 'D':
        return data['Predict'] * 1.1
    
    elif data['Sales_7_category'] == 'E':
        return data['Predict'] * 1.08
    
    
def decrease_predict(data):
    if data['Sales_7_category'] == 'A':
        return 1

    elif data['Sales_7_category'] == 'B':
        return data['Predict'] - 1
    
    elif data['Sales_7_category'] == 'C':
        return data['Predict'] - 2
    
    elif data['Sales_7_category'] == 'D':
        return data['Predict'] / 1.1
    
    elif data['Sales_7_category'] == 'E':
        return data['Predict'] / 1.08

In [77]:
predict_DF.shape

(27245, 35)

In [76]:
# TREND UP, RETURNS UP, PREDICT UP
condition = (predict_DF['Trend'] > 0) & (predict_DF['Returns_percentage'] > 0.1)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(increase_predict, axis=1)

print(predict_DF[condition].shape)

(947, 35)


In [78]:
# TREND DOWN, RETURNS DOWN, PREDICT UP
condition = (predict_DF['Trend'] < 0) & (predict_DF['Returns_percentage'] < 0.08)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(increase_predict, axis=1)

print(predict_DF[condition].shape)

(7559, 35)


In [79]:
# TREND UP, RETURNS DOWN, PREDICT UP
condition = (predict_DF['Trend'] > 0) & (predict_DF['Returns_percentage'] < 0.08)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(increase_predict, axis=1)

print(predict_DF[condition].shape)

(6585, 35)


In [80]:
# TREND DOWN, RETURNS UP, PREDICT UP
condition = (predict_DF['Trend'] < 0) & (predict_DF['Returns_percentage'] > 0.1)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(decrease_predict, axis=1)

print(predict_DF[condition].shape)

(1243, 35)


In [88]:
# NO TREND, RETURNS 8-10%, REMAINS THE SAME
condition = (predict_DF['Trend'] == 0) & (predict_DF['Returns_percentage'] >= 0.08) & (predict_DF['Returns_percentage'] <= 0.1)
predict_DF.loc[condition, 'Predict'] = predict_DF.loc[condition, 'Delivered_7']

predict_DF.loc[condition].shape

(9, 36)

In [90]:
# NO TREND, RETURNS DOWN, PREDICT UP
condition = (predict_DF['Trend'] == 0) & (predict_DF['Returns_percentage'] < 0.08)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(increase_predict, axis=1)

predict_DF.loc[condition].shape

(10002, 36)

In [91]:
# NO TREND, RETURNS UP, PREDICT DOWN
condition = (predict_DF['Trend'] == 0) & (predict_DF['Returns_percentage'] > 0.1)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(decrease_predict, axis=1)

predict_DF.loc[condition].shape

(859, 36)

In [92]:
# TREND UP, RETURNS 8-10%, PREDICT UP
condition = (predict_DF['Trend'] > 0) & (predict_DF['Returns_percentage'] >= 0.08) & (predict_DF['Returns_percentage'] <= 0.1)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(increase_predict, axis=1)

predict_DF.loc[condition].shape

(38, 36)

In [93]:
# TREND UP, RETURNS 8-10%, PREDICT UP
condition = (predict_DF['Trend'] < 0) & (predict_DF['Returns_percentage'] >= 0.08) & (predict_DF['Returns_percentage'] <= 0.1)
predict_DF.loc[condition, 'Predict'] = predict_DF[condition][['Predict', 'Sales_7_category']].apply(decrease_predict, axis=1)

predict_DF.loc[condition].shape

(3, 36)