# XmR Chart Example

This notebook demonstrates the usage of the spc_plotly package with sample vehicle count data.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from spc_plotly import xmr

# Set random seed for reproducibility
np.random.seed(475)

## Make data

In [2]:
# Generate 100 days of data
start_date = datetime(2024, 1, 1)
dates = [start_date + timedelta(days=x) for x in range(100)]

# Generate vehicle counts with some patterns:
# - Base level around 1000 vehicles
# - Gradual upward trend
# - Some random variation
# - A few anomalies

base_counts = 1000
trend = np.linspace(0, 200, 100)  # Gradual increase over time
variation = np.random.normal(0, 50, 100)  # Random daily variation
weekly_pattern = np.tile([50, 30, 0, -20, -30, -80, -100], 15)[:100]  # Weekly patterns

# Combine components
vehicle_counts = base_counts + trend + variation + weekly_pattern

# Add some anomalies
vehicle_counts[25] += 300  # Special event
vehicle_counts[50:55] += 200  # Construction period
vehicle_counts[80] -= 400  # Road closure

# Create DataFrame
data = pd.DataFrame({
    'date': dates,
    'vehicle_counts': vehicle_counts.astype(int)
})

# Display first few rows
data.head()

Unnamed: 0,date,vehicle_counts
0,2024-01-01,942
1,2024-01-02,1033
2,2024-01-03,1023
3,2024-01-04,983
4,2024-01-05,1011


## Date example

In [3]:
# Create XmR chart
xmr_chart = xmr.XmR(
    data=data,
    x_type='date_time',
    x_ser_name='date',
    y_ser_name='vehicle_counts',
    date_part_resolution='day',
    title='Daily Vehicle Counts - XmR Chart',
    period_breaks = ['2024-02-01']
)

# Display the chart
xmr_chart.xmr_chart

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [4]:
pd.to_datetime(data.date)

0    2024-01-01
1    2024-01-02
2    2024-01-03
3    2024-01-04
4    2024-01-05
        ...    
95   2024-04-05
96   2024-04-06
97   2024-04-07
98   2024-04-08
99   2024-04-09
Name: date, Length: 100, dtype: datetime64[ns]

In [5]:
_x_Ser[0]

NameError: name '_x_Ser' is not defined

In [6]:
l = ['a','b','c','d']

In [7]:
i = [5,6,7,8,9,'b']

In [8]:
i.index('b')

5

In [9]:
period_breaks = ['2024-02-01', '2024-03-12']

In [10]:
# Calculate period ranges
period_ranges = []
start_date = _x_Ser.min()
end_date = _x_Ser.max()

In [12]:
if 'timestamp' in str(type(start_date)):
    period_ranges.append((start_date, pd.to_datetime(period_breaks[0])))
    for i in range(len(period_breaks) - 1):
        period_ranges.append((pd.to_datetime(period_breaks[i]), pd.to_datetime(period_breaks[i + 1])))
    period_ranges.append((pd.to_datetime(period_breaks[-1]), end_date))
else:
    period_ranges.append((start_date, period_breaks[0]))
    for i in range(len(period_breaks) - 1):
        period_ranges.append((period_breaks[i], period_breaks[i + 1]))
    period_ranges.append((period_breaks[-1], end_date))

In [13]:
period_ranges

[(Timestamp('2024-01-01 00:00:00'), Timestamp('2024-02-01 00:00:00')),
 (Timestamp('2024-02-01 00:00:00'), Timestamp('2024-03-12 00:00:00')),
 (Timestamp('2024-03-12 00:00:00'), Timestamp('2024-04-09 00:00:00')),
 (Timestamp('2024-01-01 00:00:00'), Timestamp('2024-02-01 00:00:00')),
 (Timestamp('2024-02-01 00:00:00'), Timestamp('2024-03-12 00:00:00')),
 (Timestamp('2024-03-12 00:00:00'), Timestamp('2024-04-09 00:00:00'))]

In [19]:
_x_Ser[_x_Ser == period_ranges[1][0]]

31   2024-02-01
Name: date, dtype: datetime64[ns]

In [14]:
# Calculate limits for each period - changing this to assume ordered. 
period_results = []
for start, end in period_ranges:
    period_data = data.loc[int(_x_Ser[_x_Ser == start].index[0]):int(_x_Ser[_x_Ser == end].index[0])]
    print(period_data)

         date  vehicle_counts
0  2024-01-01             942
1  2024-01-02            1033
2  2024-01-03            1023
3  2024-01-04             983
4  2024-01-05            1011
5  2024-01-06             883
6  2024-01-07             859
7  2024-01-08            1033
8  2024-01-09            1058
9  2024-01-10             981
10 2024-01-11            1056
11 2024-01-12             987
12 2024-01-13             862
13 2024-01-14             820
14 2024-01-15            1118
15 2024-01-16            1020
16 2024-01-17            1058
17 2024-01-18             944
18 2024-01-19            1052
19 2024-01-20             965
20 2024-01-21             901
21 2024-01-22            1045
22 2024-01-23            1119
23 2024-01-24             966
24 2024-01-25            1002
25 2024-01-26            1340
26 2024-01-27             942
27 2024-01-28             953
28 2024-01-29            1181
29 2024-01-30            1054
30 2024-01-31            1130
31 2024-02-01            1046
         d

In [None]:
period

In [4]:
# Check the calculated limits and signals
print("Moving Range Limits:")
print(xmr_chart.mR_limit_values)
print("\nNatural Process Limits:")
print(xmr_chart.npl_limit_values)
print("\nDetected Signals:")
print(xmr_chart.signals)

Moving Range Limits:
{'mR_xmr_func': np.float64(91.1919191919192), 'mR_upper_limit': np.float64(298.0151919191919), 'xmr_func': 'mean'}

Natural Process Limits:
{'y_xmr_func': np.float64(1085.74), 'npl_upper_limit': np.float64(1328.310505050505), 'npl_lower_limit': np.float64(843.169494949495), 'xmr_func': 'mean'}

Detected Signals:
{'anomalies': [('2024-01-14', np.int64(820), 'Low'), ('2024-01-26', np.int64(1340), 'High'), ('2024-03-21', np.int64(703), 'Low')], 'long_runs': [[(Timestamp('2024-01-01 00:00:00'), np.int64(942), 'Low'), (Timestamp('2024-01-02 00:00:00'), np.int64(1033), 'Low'), (Timestamp('2024-01-03 00:00:00'), np.int64(1023), 'Low'), (Timestamp('2024-01-04 00:00:00'), np.int64(983), 'Low'), (Timestamp('2024-01-05 00:00:00'), np.int64(1011), 'Low'), (Timestamp('2024-01-06 00:00:00'), np.int64(883), 'Low'), (Timestamp('2024-01-07 00:00:00'), np.int64(859), 'Low'), (Timestamp('2024-01-08 00:00:00'), np.int64(1033), 'Low'), (Timestamp('2024-01-09 00:00:00'), np.int64(1058),

In [5]:
# Numeric example
numeric_data = pd.DataFrame({
    'position': range(1, 101),
    'measurement': np.random.normal(100, 10, 100)
})

xmr_numeric = xmr.XmR(
    data=numeric_data,
    x_ser_name='position',
    y_ser_name='measurement',
    x_type='numeric',
    chart_height=800
)

xmr_numeric.xmr_chart

In [6]:
# Categorical example
categories = ['Station_' + chr(ord('a') + i) for i in range(100)]
categorical_data = pd.DataFrame({
    'station': categories,
    'quality_score': np.random.normal(85, 5, 100)
}).sort_values('station')

xmr_categorical = xmr.XmR(
    data=categorical_data,
    x_ser_name='station',
    y_ser_name='quality_score',
    x_type='categorical',
    # show_grid=False
)

xmr_categorical.xmr_chart