In [51]:
# install the modules on the OS
#!pip install influxdb

# import the modules
import pandas as pd
import datetime as dt
import numpy as np
import influxdb
import os
import math
import scipy.stats as st

# define the database connection string
DB_HOST = 'localhost' 
DB_PORT = 8086
DB_NAME = 'meteorology'
DB_USER = 'root'
DB_PASSWORD = 'root'

station_name = "mythenquai"

In [61]:
def get_data_for_wind_warnings(station_name):
    """
    Get wind data and calculate possibility of storm warning
    :params station_name: station to query data from
    :returns: possibility of wind and storm warning probability
    """
    
    # Create models from data
    def best_fit_distribution(data, bins=200):
        
        global best_distribution
        
        # """Model data by finding best fit distribution to data"""
        # Get histogram of original data
        y, x = np.histogram(data.values, bins=bins, density=True)
        x = (x + np.roll(x, -1))[:-1] / 2.0

        # Distributions to check
        DISTRIBUTIONS = [
            st.dgamma, st.expon, st.exponnorm, st.gamma, st.gengamma, st.invgamma, st.invgauss,
            st.invweibull, st.johnsonsb, st.laplace, st.logistic, st.loggamma, st.loglaplace,
            st.lognorm, st.norm, st.weibull_min, st.weibull_max
        ]
        
        # Best holders
        best_distribution = st.norm
        best_params = (0.0, 1.0)
        best_sse = np.inf

        # Estimate distribution parameters from data
        for distribution in DISTRIBUTIONS:

            # Try to fit the distribution
            try:
                # Ignore warnings from data that can't be fit
                with warnings.catch_warnings():
                    warnings.filterwarnings('ignore')

                    # fit dist to data
                    params = distribution.fit(data)

                    # Separate parts of parameters
                    arg = params[:-2]
                    loc = params[-2]
                    scale = params[-1]

                    # Calculate fitted PDF and error with fit in distribution
                    pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)
                    sse = np.sum(np.power(y - pdf, 2.0))

                    # identify if this distribution is better
                    if best_sse > sse > 0:
                        best_distribution = distribution
                        best_params = params
                        best_sse = sse

            except Exception:
                pass

        return (best_distribution.name, best_params)

    def make_pdf(dist, params, size=10000):
        """Generate distributions's Probability Distribution Function """

        # Separate parts of parameters
        arg = params[:-2]
        loc = params[-2]
        scale = params[-1]

        # Get sane start and end points of distribution
        start = dist.ppf(0.01, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.01, loc=loc, scale=scale)
        end = dist.ppf(0.99, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.99, loc=loc, scale=scale)

        # Build PDF and turn into pandas Series
        x = np.linspace(start, end, size)
        y = dist.cdf(x, loc=loc, scale=scale, *arg)
        pdf = pd.Series(y, x)

        return pdf

  # Load data from statsmodels datasets
    #data = data.set_index('timestamp_cet')
    #data = data.wind_gust_max_10min
    
    client = influxdb.DataFrameClient(DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME)
    
    query = """SELECT
                wind_gust_max_10min
                FROM "meteorology"."autogen"."{}"
                WHERE time > now() - 6h""".format(station_name)
    
    query = """SELECT (wind_gust_max_10min) 
    FROM "meteorology"."autogen"."mythenquai" 
    WHERE time > '2019-03-10' AND time < '2019-03-11'"""

    df = pd.DataFrame(client.query(query)[station_name])

    # Find best fit distribution
    best_fit_name, best_fit_params = best_fit_distribution(df, 10)
    best_dist = getattr(st, best_fit_name)

    # Make PDF with best params
    pdf = make_pdf(best_dist, best_fit_params)
    pdf = pdf.to_frame()
    
    print(pdf)

    #try:
    values_above_12 = pdf[pdf.index > 12.7]
    print(values_above_12)
    val = values_above_12.iloc[0][0]
    prob_strong_wind = (1 - val) * 100
    #except:
    #   prob_strong_wind = 0

    #try:
    values_above_12 = pdf[pdf.index > 16.9]
    val = values_above_12.iloc[0][0]
    prob_sturm_wind = (1 - val) * 100
    #except:
    #    prob_sturm_wind = 0

    return prob_strong_wind, prob_sturm_wind




warnings = get_data_for_wind_warnings("mythenquai")
print(warnings[0], warnings[1])

                  0
-2.326348  0.010000
-2.325883  0.010012
-2.325417  0.010025
-2.324952  0.010037
-2.324487  0.010050
...             ...
 2.324487  0.989950
 2.324952  0.989963
 2.325417  0.989975
 2.325883  0.989988
 2.326348  0.990000

[10000 rows x 1 columns]
Empty DataFrame
Columns: [0]
Index: []


IndexError: single positional indexer is out-of-bounds