# Analysis of CiCs that are offline

In [215]:
from urllib.parse import urlparse
from dotenv import load_dotenv
import os
from pathlib import Path
import json
import redis
import pandas as pd # only if needed for filtering of data
import mysql.connector
from mysql.connector import Error
from datetime import datetime, timedelta
import pytz

In [216]:
# load .env file
env_path = Path('../') / '.env'
load_dotenv(env_path)

True

## Get data from MySql

In [217]:
mysql_url = os.getenv('MYSQLPROD')
parsed_mysql_url = urlparse(mysql_url)

query = """
        SELECT cic.id, cic.wifiConnectionStatus, cic.orderNumber, cic.createdAt, cic.updatedAt, 
        cicCommissioning.completedAt as commissioningCompletedAt, cicCommissioning.identificationTestStartAt
        FROM cic
        LEFT JOIN cicCommissioning ON cic.id = cicCommissioning.cicId
        """

try:
    connection = mysql.connector.connect(host=parsed_mysql_url.hostname,
                                         user=parsed_mysql_url.username,
                                         password=parsed_mysql_url.password,
                                         database=parsed_mysql_url.path[1:],
                                         port=parsed_mysql_url.port)
    if connection.is_connected():
        db_Info = connection.get_server_info()
        print("Connected to MySQL Server version ", db_Info)
        cursor = connection.cursor()
        cursor.execute(query)
        fields = [field_md[0] for field_md in cursor.description]
        result = [dict(zip(fields,row)) for row in cursor.fetchall()]
        print(result)

except Error as e:
    print("Error while connecting to MySQL", e)
finally:
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection is closed")

Connected to MySQL Server version  8.0.28
[{'id': 'CIC-00149d9a-da31-5e61-844d-3e818b8a2ded', 'wifiConnectionStatus': 'connected', 'orderNumber': 'QUATT3293', 'createdAt': datetime.datetime(2023, 3, 17, 1, 39, 6, 973000), 'updatedAt': datetime.datetime(2023, 7, 16, 1, 59, 38, 410000), 'commissioningCompletedAt': datetime.datetime(2023, 6, 9, 11, 35, 47, 210000), 'identificationTestStartAt': datetime.datetime(2023, 6, 9, 11, 23, 4, 226000)}, {'id': 'CIC-00397c56-d471-5a15-a964-56191a52c44f', 'wifiConnectionStatus': 'disconnected', 'orderNumber': None, 'createdAt': datetime.datetime(2023, 3, 20, 8, 46, 33, 360000), 'updatedAt': datetime.datetime(2023, 4, 1, 11, 8, 14, 36000), 'commissioningCompletedAt': None, 'identificationTestStartAt': None}, {'id': 'CIC-0043864c-e94c-59d1-9f86-8193abf8878c', 'wifiConnectionStatus': 'disconnected', 'orderNumber': 'QUATT4567', 'createdAt': datetime.datetime(2023, 6, 27, 20, 32, 58, 595000), 'updatedAt': datetime.datetime(2023, 7, 14, 14, 49, 15, 678000)

## Exploratory data analysis

In [218]:
# Convert to Pandas DataFrame
df_sql = pd.DataFrame.from_dict(result)

# Make all order numbers upper case
df_sql['orderNumber'] = df_sql['orderNumber'].str.upper()

# Filter out test CiCs
df_sql_orders = df_sql[~pd.isna(df_sql['orderNumber']) & ~df_sql['orderNumber'].str.upper().isin(['QUATT0000', 'QUATT00000'])].copy()# [['id', 'orderNumber', 'lastSeen']]'

print('Complete dataset')
print(df_sql_orders)

# Identify Order numbers that have multiple CiCs
df_sql_orders['cic_count'] = df_sql_orders.groupby('orderNumber')['id'].transform('count')
print('Order numbers that have multiple CiCs')
print(df_sql_orders[df_sql_orders['cic_count'] != 1][['id', 'orderNumber', 'cic_count', 'commissioningCompletedAt', 'identificationTestStartAt']].sort_values(
    by=['cic_count', 'orderNumber', 'commissioningCompletedAt', 'identificationTestStartAt'], ascending=False))

# Order numbers that have only one CiC for which the last commissioning has not been completed
print('Order numbers that have only one CiC for which the last commissioning has not been completed')
print(df_sql_orders[pd.isna(df_sql_orders['commissioningCompletedAt']) & df_sql_orders['cic_count'] == 1][['id', 'orderNumber', 'cic_count', 'commissioningCompletedAt']])

# Order numbers that have multiple CiCs, of which, the last commissioning has not been completed
df_sql_latest_duplicates = df_sql_orders[df_sql_orders['cic_count'] != 1].sort_values(
    by=['cic_count', 'orderNumber', 'identificationTestStartAt', 'commissioningCompletedAt'], ascending=False).groupby(
        'orderNumber').first().reset_index()
print('Order numbers that have multiple CiCs, of which, the last commissioning has not been completed')
print(df_sql_latest_duplicates[pd.isna(df_sql_latest_duplicates['commissioningCompletedAt'])][['cic_count', 'orderNumber', 'identificationTestStartAt', 'commissioningCompletedAt']])


Complete dataset
                                            id wifiConnectionStatus  \
0     CIC-00149d9a-da31-5e61-844d-3e818b8a2ded            connected   
2     CIC-0043864c-e94c-59d1-9f86-8193abf8878c         disconnected   
4     CIC-008396fd-1fe4-5812-b410-91ac000e439c         disconnected   
5     CIC-0084fc13-f765-505f-ac5f-ecfd36eab409         disconnected   
8     CIC-00ca4ee0-d7df-5bc1-b575-c90aa7ab3046         disconnected   
...                                        ...                  ...   
2963  CIC-ff7018b8-e34c-5fc3-99c5-2060c1001858            connected   
2965  CIC-ff9155b8-30a7-51ea-bad2-fee0ff5eacef         disconnected   
2967  CIC-ffd3bdd6-fd98-54cc-ab62-6a89aa0e16fd         disconnected   
2968  CIC-ffd4679f-365a-5536-8817-62509ce33977         disconnected   
2969  CIC-ffe56568-3bf3-5943-ac2c-ad4461aa6144            connected   

     orderNumber               createdAt               updatedAt  \
0      QUATT3293 2023-03-17 01:39:06.973 2023-07-16 01:59:38.4

## Find active CiCs
MySql contains data of all CiC's in production. However some CiC's are not linked to a customer (for examplem test CiC's), while others CiC's are old CiCs that have been replaced. These CiCs have to be filtered out from the dataset

In [219]:
# Filter out test CiCs
df_active_cics = df_sql[~pd.isna(df_sql['orderNumber']) & ~df_sql['orderNumber'].str.upper().isin(['QUATT0000', 'QUATT00000'])].copy()

# For order numbers with multiple CiCs, take the one with the latest commissioningCompletedAt
df_active_cics['cicCount'] = df_active_cics.groupby('orderNumber')['id'].transform('count')
df_active_cics = df_active_cics.sort_values(
    by=['cicCount', 'orderNumber', 'commissioningCompletedAt', 'identificationTestStartAt'], ascending=False).groupby(
        'orderNumber').first().reset_index()

print(df_active_cics)

     orderNumber                                        id  \
0      QUATT0513  CIC-38ab1baf-c634-53e4-834f-b5a954de688c   
1        QUATT10  CIC-fc1e25fb-a977-59bb-995f-cc369cf21ddf   
2      QUATT1001  CIC-d985ad09-772c-56b8-9520-ba007c4d8c28   
3      QUATT1002  CIC-7901ae8b-294a-5e3a-a3fa-5c08b5b7c1d7   
4      QUATT1005  CIC-fe449100-7754-5949-b9a5-c88b0314fc7e   
...          ...                                       ...   
2053    QUATT984  CIC-00ca4ee0-d7df-5bc1-b575-c90aa7ab3046   
2054    QUATT985  CIC-739880c5-9c80-50a8-8b71-e9e0498ee340   
2055     QUATT99  CIC-0137b9d6-01b3-541f-84c5-21127178b6f2   
2056    QUATT997  CIC-92fa0642-2e98-5a57-a5a9-f2a7d82c8993   
2057    QUATT999  CIC-c72512f1-a7f7-50b4-8af9-8be5d2dcef30   

     wifiConnectionStatus               createdAt               updatedAt  \
0               connected 2023-06-24 23:05:44.099 2023-07-15 17:04:36.273   
1            disconnected 2022-12-02 10:54:04.731 2023-06-09 06:21:43.367   
2               connecte

### Getting CiC info from Redis

In [220]:
def get_cic_stats_from_redis(redis_url):
    # Connect to Redis database
    parsed_url = urlparse(redis_url)
    r = redis.Redis(host=parsed_url.hostname,
                    port=parsed_url.port,
                    db=0,
                    password=parsed_url.password,
                    username=parsed_url.username)
    
    # get objects from redis
    redis_keys = r.keys(pattern="cic:*CIC*lastStat*")
    redis_objects = r.mget(redis_keys) 
    results = []
    for key, obj in zip(redis_keys, redis_objects):
        try:
            json_obj = json.loads(obj.decode())
            json_obj['redisKeyId'] = key.decode()[4:-9]
            results.append(json_obj)
        except:
            pass
    return results

In [221]:
# get cic data from redis
REDIS_URL = os.getenv("REDISPROD")

redis_data = get_cic_stats_from_redis(REDIS_URL)

df_redis = pd.json_normalize(redis_data)
print(df_redis.shape)

# # select list of cic ids which are connected via Lte
# cic_ids = df_redis[(df_redis['system.quattBuild'] != "2.0.1")][['system.quattId', 'system.quattBuild','time.tsHuman', 'system.isLteConnected','system.isWifiReachable','system.isEthernetReachable']]

# # print list of cic ids
# print(cic_ids)

(2183, 382)


## Join SQL and redis data

In [222]:
# df_active_last_stat = df_redis.set_index('system.quattId').join(df_active_cics.set_index('id'), how='left').reset_index()
df_active_last_stat = df_active_cics.set_index('id').join(df_redis.set_index('redisKeyId'), how='left').reset_index()
print(df_active_last_stat)

                                            id orderNumber  \
0     CIC-38ab1baf-c634-53e4-834f-b5a954de688c   QUATT0513   
1     CIC-fc1e25fb-a977-59bb-995f-cc369cf21ddf     QUATT10   
2     CIC-d985ad09-772c-56b8-9520-ba007c4d8c28   QUATT1001   
3     CIC-7901ae8b-294a-5e3a-a3fa-5c08b5b7c1d7   QUATT1002   
4     CIC-fe449100-7754-5949-b9a5-c88b0314fc7e   QUATT1005   
...                                        ...         ...   
2053  CIC-00ca4ee0-d7df-5bc1-b575-c90aa7ab3046    QUATT984   
2054  CIC-739880c5-9c80-50a8-8b71-e9e0498ee340    QUATT985   
2055  CIC-0137b9d6-01b3-541f-84c5-21127178b6f2     QUATT99   
2056  CIC-92fa0642-2e98-5a57-a5a9-f2a7d82c8993    QUATT997   
2057  CIC-c72512f1-a7f7-50b4-8af9-8be5d2dcef30    QUATT999   

     wifiConnectionStatus               createdAt               updatedAt  \
0               connected 2023-06-24 23:05:44.099 2023-07-15 17:04:36.273   
1            disconnected 2022-12-02 10:54:04.731 2023-06-09 06:21:43.367   
2               connecte

## Missing system.quattId in Redis

In [223]:
df_redis[~pd.isna(df_redis['system.quattId'])][['time.tsHuman', 'system.cpuTemp', 'system.quattId', 'system.quattBuild']]
print('CiCs that report quattId in stat')
print(df_redis[~pd.isna(df_redis['system.quattId'])]['system.quattBuild'].value_counts())
print('\nCiCs that do NOT report quattId in stat')
print(df_redis[pd.isna(df_redis['system.quattId'])]['system.quattBuild'].value_counts())

CiCs that report quattId in stat
system.quattBuild
2.0.1                  312
2.0.3                   40
1.2.6                   23
2.0.4                   14
1.2.5                   14
0.1.3                    5
0.0.28                   4
0.01.03                  3
0.0.44                   2
0.00.57                  2
01.02.06-production      2
01.02.05-factory         2
2.0.2                    2
0.0.57                   1
0.0.53                   1
0.00.44                  1
1.1.0                    1
2.0.0                    1
Name: count, dtype: int64

CiCs that do NOT report quattId in stat
system.quattBuild
2.0.4    1723
2.0.3       7
2.0.1       3
2.1.0       1
Name: count, dtype: int64


## Analyse connectivity

In [224]:
df_connectivity = df_active_last_stat[['id', 'orderNumber', 'system.quattBuild','time.tsHuman','system.cpuTemp', 'system.isWifiReachable','system.isEthernetReachable', 'system.isLteReachable',
                                       'system.isWifiConnected','system.isEthernetConnected', 'system.wifiConnectedSsid']].copy()
df_connectivity['time.tsHuman'] = pd.to_datetime(df_connectivity['time.tsHuman'])
df_connectivity['isCiCInLteMode'] = [True if (row['system.isLteReachable'] and not row['system.isWifiReachable'] and not row['system.isEthernetReachable']) else False for idx, row in df_connectivity.iterrows()]
print(df_connectivity.sort_values(by='time.tsHuman'))
df_connectivity['timeSinceLastStat'] = datetime.now(pytz.utc) - df_connectivity['time.tsHuman']
df_connectivity['daysSinceLastStat'] = df_connectivity['timeSinceLastStat'].transform(lambda x: x.days)
print(df_connectivity.sort_values(by='time.tsHuman'))

                                            id orderNumber system.quattBuild  \
552   CIC-87b78669-f0fa-552a-a27c-5a4d723ac433    QUATT245           0.00.44   
912   CIC-6b3e6981-c0f1-5447-bbdb-147b43f36af1   QUATT3489               NaN   
1303  CIC-0e800821-26da-5930-925b-edbe042b6b7e   QUATT4847             1.2.6   
911   CIC-a27f947b-2364-57f8-9c8b-d784ba3aa035   QUATT3486             1.2.6   
1939  CIC-4cc4ab83-1687-59ec-aa4c-2066e204d71e    QUATT776             2.0.1   
...                                        ...         ...               ...   
625   CIC-892f883d-5bd3-56d3-bce4-c4846887e3ff   QUATT2656             2.0.1   
229   CIC-dafd40e4-3375-5313-90dd-cc02be75d5a6   QUATT1596               NaN   
231   CIC-8e4cc0b5-a44a-555b-94b8-856b081317ce     QUATT16               NaN   
776   CIC-2a510813-a545-5921-81c8-1380db3120c6   QUATT3080               NaN   
1236  CIC-0043864c-e94c-59d1-9f86-8193abf8878c   QUATT4567               NaN   

                         time.tsHuman  

In [257]:
df_offline = df_connectivity[df_connectivity['daysSinceLastStat'] >= 1].copy()
df_online = df_connectivity[df_connectivity['daysSinceLastStat'] < 1 ].copy()

# Remove Time zone
df_offline['time.tsHuman'] = df_offline['time.tsHuman'].dt.tz_localize(None)
df_online['time.tsHuman'] = df_online['time.tsHuman'].dt.tz_localize(None)

print(df_offline.shape)
print(df_online.shape)

(61, 14)
(1993, 14)


In [258]:
print(f"CiCs that are completely offline = {df_offline.shape[0]}")
cicsInLteMode = df_online[df_online['isCiCInLteMode'] == True]

# Make sub-groups of CiCs in LTE mode
print(f"CiCs in LTE mode = {cicsInLteMode.shape[0]}")
cicsInLteModeWithCableOnly = df_online[(df_online['isCiCInLteMode'] == True) & (df_online['system.isEthernetConnected'] == True) & (pd.isna(df_online['system.wifiConnectedSsid']))]
print(f"\tCiCs in LTE mode with only cable connected (probably ipv6 issue) = {cicsInLteModeWithCableOnly.shape[0]}")
cicsInLteModeWithWifiSsidOnly = df_online[(df_online['isCiCInLteMode'] == True) & ~(pd.isna(df_online['system.wifiConnectedSsid'])) & (df_online['system.isEthernetConnected'] == False)]
print(f"\tCiCs in LTE mode with only wifi ssid (probably wifi issue) = {cicsInLteModeWithWifiSsidOnly.shape[0]}")
cicsInLteModeNoCableOrWifi = df_online[(df_online['isCiCInLteMode'] == True) & (pd.isna(df_online['system.wifiConnectedSsid'])) & (df_online['system.isEthernetConnected'] == False)]
print(f"\tCiCs in LTE mode with no cable connected and no wifi credentials = {cicsInLteModeNoCableOrWifi.shape[0]}")
cicsInLteModeCableAndWifi = df_online[(df_online['isCiCInLteMode'] == True) & ~(pd.isna(df_online['system.wifiConnectedSsid'])) & (df_online['system.isEthernetConnected'] == True)]
print(f"\tCiCs in LTE mode with cable connected and wifi credentials = {cicsInLteModeCableAndWifi.shape[0]}")

CiCs that are completely offline = 61
CiCs in LTE mode = 390
	CiCs in LTE mode with only cable connected (probably ipv6 issue) = 71
	CiCs in LTE mode with only wifi ssid (probably wifi issue) = 255
	CiCs in LTE mode with no cable connected and no wifi credentials = 49
	CiCs in LTE mode with cable connected and wifi credentials = 15


In [259]:
cicsInLteModeWithWifiSsidOnly[['id', 'isCiCInLteMode', 'system.wifiConnectedSsid', 'system.isEthernetConnected']]

Unnamed: 0,id,isCiCInLteMode,system.wifiConnectedSsid,system.isEthernetConnected
7,CIC-9048e9b8-ca9a-5525-9e0b-5a2b503a412e,True,DecoE4netw,False
23,CIC-c79e3db2-3982-5380-be58-df084d97aa11,True,H369A2FF008,False
26,CIC-07a5b3a4-f626-5f7a-a70d-23b5ae2ffc2c,True,google_mvh,False
37,CIC-d950fd60-f2b9-53ec-b7d5-bc8b2ea06588,True,Bleijehuis,False
52,CIC-306e94f5-2263-5e48-bea9-d906bd7adad7,True,kvtm,False
...,...,...,...,...
1998,CIC-a2bf36fe-2f60-5791-9bc9-96afea620ec4,True,KHD1,False
2015,CIC-5b03072d-b848-5cf4-89c6-449ffdb6faf5,True,PL-RS-RES-IoT,False
2036,CIC-9482c26f-47b8-58fa-9e01-911623b3a0e8,True,H369AD43D88,False
2040,CIC-c82d5a41-6b47-5430-877d-9939221c5d73,True,JANDM2,False


## Export Data

In [260]:
with pd.ExcelWriter(f"{datetime.today().strftime('%Y-%m-%d')}_connectivity_analysis.xlsx") as writer:
    df_offline.to_excel(writer, sheet_name='Offline', index=False)
    cicsInLteMode.to_excel(writer, sheet_name='LTE mode', index=False)
    cicsInLteModeWithCableOnly.to_excel(writer, sheet_name='LTE mode + cable only', index=False)
    cicsInLteModeWithWifiSsidOnly.to_excel(writer, sheet_name='LTE mode + WiFi only', index=False)
    cicsInLteModeCableAndWifi.to_excel(writer, sheet_name='LTE mode + cable + WiFi', index=False)
    cicsInLteModeNoCableOrWifi.to_excel(writer, sheet_name='LTE mode no Cable No WiFi', index=False)

## Analysis summary
This summary is based on data collected on 17-07-2023.

The CiCs with connectivity issues can be split into two groups - CiCs that are in LTE mode, and CiC's that are completely offline.

### CiCs in LTE mode
There are 390 CiCs that are currently publishing data in LTE mode. This group can be split into 4 mutually exclusive groups based on the connectivity status of the two other interfaces - whether there is a cable currently connected to the CiC and whether the CiC has a WiFi credential (indicating it was connected to WiFi in the past).
#### CiCs with a WiFi credential and a cable connected
There are 15 CiCs that are publishing data in LTE mode while having a cable connected and also having a WiFi credential. These CiCs are most likely receiving only an inet6 address.
#### CiCs with a WiFi credential and no cable connected
There are 255 CiCs that are publishing data in LTE mode which do have a WiFi credential saved but no ethernet cable. These CiCs may be suffering from one or more of the following issues:
- Known WiFi issue
- CiC only receiving an inet6 address
- Incorrect WiFi credential
- Unknown WiFi issue?
#### CiCs with a cable connected and no WiFi credential
There are 71 CiCs that are publishing data in LTE mode which do have a connected cable but no WiFi credential saved. These CiCs are most likely receiving only an inet6 address.
#### CiCs with no cable connected and no WiFi credential
There are 15 CiCs that are publishing data in LTE mode which have neither a connected cable nor a WiFi credential saved. It is not clear what may have causes these issues. Some possible explanations might include:
- Installer used own hotpsot during commissioning and customer did not connect the CiC to his network yet
- WiFi credentials were not saved in the CiC for an unknown reason


### CiCs that are completely offline
There are 61 CiCs that are completely offline. For these CiCs it is not possible to identify what might be causing the connectivity issues. We might have to ask these customers to power cycle their CiCs in the hopes that they come online, even if only on LTE.