In [14]:
import pandas as pd
import transform

# Load your data
df = data=transform.Transform().livestock
df = df[df['Commodity_Description'] == 'Animal Numbers, Cattle']
df = transform.Transform().transform_commodity_by_export(data=df)


   Commodity_Description  Calendar_Year Attribute_Description     Value  \
3          Meat, Chicken           2018               Exports   60841.0   
12         Meat, Chicken           2019               Exports     148.0   
21         Meat, Chicken           2020               Exports   12239.0   
30         Meat, Chicken           2021               Exports   46631.0   
39         Meat, Chicken           2022               Exports  115823.0   

                    Origin  
3   United States, America  
12  United States, America  
21  United States, America  
30  United States, America  
39  United States, America  


In [11]:
df.head()

Unnamed: 0,Commodity_Description,Calendar_Year,Attribute_Description,Value,Origin
2079,"Meat, Chicken",2018,Exports,60841.0,"United States, America"
2088,"Meat, Chicken",2019,Exports,148.0,"United States, America"
2097,"Meat, Chicken",2020,Exports,12239.0,"United States, America"
2106,"Meat, Chicken",2021,Exports,46631.0,"United States, America"
2115,"Meat, Chicken",2022,Exports,115823.0,"United States, America"


In [12]:
import matplotlib.pyplot as plt
from prophet import Prophet

# Rename columns to fit Prophet's requirements
df.rename(columns={'Calendar_Year': 'ds', 'Value': 'y'}, inplace=True)
# Ensure the date column is parsed as yearly data
df['ds'] = pd.to_datetime(df['ds'], format='%Y')

# Initialize and fit the Prophet model
model = Prophet()
model.fit(df)

# Create a DataFrame for future dates (including historical dates)
future = model.make_future_dataframe(periods=0, freq='Y')

# Make predictions
forecast = model.predict(future)

# Extract the trend component
trend = forecast[['ds', 'trend']]

# Calculate residuals
df['trend'] = trend['trend']
df['residual'] = df['y'] - df['trend']

19:30:32 - cmdstanpy - INFO - Chain [1] start processing
19:30:32 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(


In [13]:
df.head()

Unnamed: 0,Commodity_Description,ds,Attribute_Description,y,Origin,trend,residual
2079,"Meat, Chicken",2018-01-01,Exports,60841.0,"United States, America",,
2088,"Meat, Chicken",2019-01-01,Exports,148.0,"United States, America",,
2097,"Meat, Chicken",2020-01-01,Exports,12239.0,"United States, America",,
2106,"Meat, Chicken",2021-01-01,Exports,46631.0,"United States, America",,
2115,"Meat, Chicken",2022-01-01,Exports,115823.0,"United States, America",,


Step 2: Identify Trends
To identify periods of increasing or decreasing trends, you can calculate the first derivative of the trend component. This will give you the rate of change, which you can use to identify periods of increase or decrease.

In [4]:
# Calculate the first derivative of the trend
trend['trend_diff'] = trend['trend'].diff()

# Identify periods of increase or decrease
trend['trend_direction'] = trend['trend_diff'].apply(lambda x: 'Increase' if x > 0 else 'Decrease' if x < 0 else 'No Change')

# Identify consecutive periods of increase or decrease
trend_periods = trend.groupby((trend['trend_direction'] != trend['trend_direction'].shift()).cumsum()).agg({
    'ds': ['first', 'last'],
    'trend_direction': 'first'
}).reset_index(drop=True)

# Rename columns
trend_periods.columns = ['Start', 'End', 'Direction']

Step 3: Identify Consistency and Volatility
To identify periods of consistency and volatility, calculate the rolling standard deviation of the residuals.

In [5]:
# Calculate rolling standard deviation of residuals
df['rolling_std'] = df['residual'].rolling(window=12).std()  # 12-month rolling window

# Define thresholds for high and low volatility
high_volatility_threshold = df['rolling_std'].quantile(0.75)  # 75th percentile
low_volatility_threshold = df['rolling_std'].quantile(0.25)   # 25th percentile

# Identify periods of high and low volatility
df['volatility'] = df['rolling_std'].apply(lambda x: 'High' if x > high_volatility_threshold else 'Low' if x < low_volatility_threshold else 'Medium')

# Identify consecutive periods of high or low volatility
volatility_periods = df.groupby((df['volatility'] != df['volatility'].shift()).cumsum()).agg({
    'ds': ['first', 'last'],
    'volatility': 'first'
}).reset_index(drop=True)

# Rename columns
volatility_periods.columns = ['Start', 'End', 'Volatility']

Combine Results
Combine the results of trends and volatility analysis to get a comprehensive view of the time points when these events occur.

In [6]:
# Combine trend and volatility periods
analysis_results = pd.concat([trend_periods, volatility_periods], axis=0).sort_values(by='Start')

# Print results
print(analysis_results)


                          Start                           End  Direction  \
0 1970-01-01 00:00:00.000002018 1970-01-01 00:00:00.000002018  No Change   
0 1970-01-01 00:00:00.000002018 1970-01-01 00:00:00.000002022        NaN   
1 1970-01-01 00:00:00.000002019 1970-01-01 00:00:00.000002022   Increase   

  Volatility  
0        NaN  
0     Medium  
1        NaN  


Explanation
Trend Analysis: The trend component is analyzed to identify periods of increasing or decreasing trends.
Volatility Analysis: The rolling standard deviation of the residuals is calculated to identify periods of high and low volatility.
Combine Results: The results from both analyses are combined and sorted by the start date to give a comprehensive view of when trends, consistency, and volatility occur.
This approach allows you to pinpoint specific time points (years) when certain patterns or behaviors are present in your time series data.