In [1]:
import numpy as np 
import pandas as pd 
from scipy import stats
from scipy.stats import ks_2samp
import matplotlib.pyplot as plt
from statistics import mean
from statistics import pstdev

In [2]:
#import data 
#data format example: 2012 01 ship indicator, one file represents one port, 60X3 for each file


In [3]:
#matrices: this function will give general information of monthly, seasonal and yearly change of the input port
#monthly: month-(previous month) should contain n-1 (n is the total data number)
#seasonal: month-(last quarter) should contain around n-3
#yearly: month -last year month should contain n-12=
#input: only the third column of data if the data is regular 5 year of 12 months and is in sequence of time
#return: monthly_avg, monthly_std, seasonal_avg, seasonal_std, yearly_avg, yearly_std
def general_info_port(data):
    #initialization
    monthly=[]
    seasonal=[]
    yearly=[]
    #monthly
    for i in range(len(data)-1):
        monthly.append(data[i+1]-data[i])
  
    #seasonal
    for i in range(len(data)-3):
        seasonal.append(data[i+3]-data[i])
  
    #yearly
    for i in range(len(data)-12):
        yearly.append(data[i+12]-data[i])

    #return: monthly_avg, monthly_std, seasonal_avg, seasonal_std, yearly_avg, yearly_std
    return mean(monthly),pstdev(monthly), mean(seasonal),pstdev(seasonal), mean(yearly),pstdev(yearly)


In [4]:
#the function will give the predictive indicator for a certain date of the input port
#given a date, predict possible range of value of that date
#input: position of the desired month in the matrix, data itself(for getting all the mean and stds) 
#(e.g. if starts with 2012.01, and the desired month is 2013.01, then position = 12)
#please don't select month in 2012
#if the desired month is the next month, then position = current data length (60 in this case)
#return: monthly_predict, monthly_std, seasonal_predict, seasonal_std, yearly_predict, yearly_std
def prediction(position,data):
    monthly_avg, monthly_std, seasonal_avg, seasonal_std, yearly_avg, yearly_std=general_info_port(data[:,2])
    monthly_predict=data[position-1,2]+monthly_avg
    seasonal_predict=data[position-3,2]+seasonal_avg
    yearly_predict=data[position-12,2]+yearly_avg
    return monthly_predict,monthly_std,seasonal_predict, seasonal_std, yearly_predict, yearly_std

In [28]:
#compare real data to the prediction data, send alert if abnormal
#input: position, data to call prediction, true_value to compare with
#output: print alert if abnormal, else print normal
def abnormal_detection(position,data,true_value):
    monthly_predict,monthly_std,seasonal_predict, seasonal_std, yearly_predict, yearly_std=prediction(position,data)
    #std-tolerance is 1 currently, could be set to 2 if 1 is inappropriate
    std_tolerance=1
    #test if the true value is normal in the sense of monthly trend
    monthly_low=monthly_predict-monthly_std*std_tolerance
    monthly_high=monthly_predict+monthly_std*std_tolerance
    if true_value<monthly_low or true_value>monthly_high:
        print("Alert-monthly: was expecting ", monthly_low, " to ", monthly_high, ", but is ", true_value)
    else:
        print("Normal-monthly")

    #test if the true value is normal in the sense of seasonal trend
    seasonal_low=seasonal_predict-seasonal_std*std_tolerance
    seasonal_high=seasonal_predict+seasonal_std*std_tolerance
    if true_value<seasonal_low or true_value>seasonal_high:
        print("Alert-seasonal: was expecting ", seasonal_low, " to ", seasonal_high, ", but is ", true_value)
    else:
        print("Normal-seasonal")

    #test if the true value is normal in the sense of yearly trend
    yearly_low=yearly_predict-yearly_std*std_tolerance
    yearly_high=yearly_predict+yearly_std*std_tolerance
    if true_value<yearly_low or true_value>yearly_high:
        print("Alert-yearly: was expecting ", yearly_low, " to ", yearly_high, ", but is ", true_value)
    else:
        print("Normal-yearly")

In [45]:
#generate mock data: Number of ships for size small, medium and big
features=3
coefficient=100
year_number=5
data=np.around(np.random.rand(year_number*12,features)*coefficient)

#mock printing information about the given port
print(general_info_port(data[:,2]))

#mock prediction of next month
print(prediction(60,data))

# #mock abnormal detection
abnormal_detection(60,data,200)

(1.5254237288135593, 37.989452564842075, 0.21052631578947367, 41.19944508317543, 1.2291666666666667, 38.647574084784274)
(95.52542372881356, 37.989452564842075, 16.210526315789473, 41.19944508317543, 61.229166666666664, 38.647574084784274)
Alert-monthly: was expecting  57.53597116397149  to  133.51487629365565 , but is  200
Alert-seasonal: was expecting  -24.98891876738596  to  57.409971398964906 , but is  200
Alert-yearly: was expecting  22.58159258188239  to  99.87674075145094 , but is  200


In [46]:
def get_port_data(all_data, name):
    #data = all_data[name]
    features=3
    coefficient=100
    year_number=5
    data = np.around(np.random.rand(year_number*12,features)*coefficient)
    return data

In [47]:
def main():
    print("Select desired port to get information:")
    port_name = input()
    print("- Selected port:", port_name)
    port_data = get_port_data([], port_name)
    while True:
        print("Select one of the follow commands: 'port_info', 'prediction', 'abnormal_detection', or 'quit'")
        command = input()
        if command == 'port_info':
            monthly_avg, monthly_std, seasonal_avg, seasonal_std, yearly_avg, yearly_std = general_info_port(port_data[:,2])
            print("Monthly average:", monthly_avg)
            print("Monthly standard deviation:", monthly_std)
            print("Seasonal average:", monthly_avg)
            print("Seasonal standard deviation:", monthly_avg)
            print("Yearly average:", monthly_avg)
            print("Yearly standard deviation:", monthly_avg)
            print()
        elif command == 'prediction':
            while True:
                print("Enter a desired month in format YYYY.MM starting from 2012.01 (please don't select month in 2012)")
                position = input()
                try:
                    year = int(position.split(".")[0])
                    month = int(position.split(".")[1])
                    if month > 12:
                        print("Invalid month...")
                    elif year < 2012:
                        print("Invalid year...")
                    else:
                        position = (year - 2012) * 12 + (month - 1)
                        monthly_predict,monthly_std,seasonal_predict, seasonal_std, yearly_predict, yearly_std = prediction(position, port_data)
                        print("Monthly predict:", monthly_predict)
                        print("Monthly standard deviation:", monthly_std)
                        print("Seasonal predict:", monthly_avg)
                        print("Seasonal standard deviation:", monthly_avg)
                        print("Yearly predict:", monthly_avg)
                        print("Yearly standard deviation:", monthly_avg)
                        print()
                        break
                except:
                    print("Invalid format")
        elif command == 'abnormal_detection':
            while True:
                try:
                    print("Enter a desired month in format YYYY.MM starting from 2012.01 (please don't select month in 2012)")
                    position = input()
                    year = int(position.split(".")[0])
                    month = int(position.split(".")[1])
                    if month > 12:
                        print("Invalid month...")
                    elif year < 2012:
                        print("Invalid year...")
                    else:
                        print("Enter a true value to compare real data to the prediction data (must be a positive number):")
                        true_value = int(input())
                        position = (year - 2012) * 12 + (month - 1)
                        abnormal_detection(position, port_data, true_value)
                        break
                except:
                    print("Invalid format")
        elif command == 'quit':
            break
        else:
            print("Invalid command")
            

In [48]:
main()

Select desired port to get information:
l
- Selected port: l
Select one of the follow commands: 'port_info', 'prediction', 'abnormal_detection', or 'quit'
abnormal_detection
Enter a desired month in format YYYY.MM starting from 2012.01 (please don't select month in 2012)
2018.07
Enter a true value to compare real data to the prediction data (must be a positive number):
200
Invalid format
Enter a desired month in format YYYY.MM starting from 2012.01 (please don't select month in 2012)
quit
Invalid format
Enter a desired month in format YYYY.MM starting from 2012.01 (please don't select month in 2012)
Invalid format
Enter a desired month in format YYYY.MM starting from 2012.01 (please don't select month in 2012)
Invalid format
Enter a desired month in format YYYY.MM starting from 2012.01 (please don't select month in 2012)
2017.01
Enter a true value to compare real data to the prediction data (must be a positive number):
200
Alert-monthly: was expecting  32.07206366165528  to  124.368614