In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

![](https://media-exp3.licdn.com/dms/image/C4E1BAQEuTsJLViLkQw/company-background_10000/0/1587631889353?e=2159024400&v=beta&t=7N65emCWIoGvjrTaXUbHwLSaNEySNW0rAiEADaGOutI)br.linkedin.com

<center style="font-family:verdana;"><h1 style="font-size:200%; padding: 20px; background: #001f3f;"><i><b style="color:white;">Feyn and QLattice</b></i></h1></center>

"Feyn is a Python module for interacting with the QLattice."

"The QLattice is a machine learning technology that helps you search through an infinite list of potential mathematical models to solve your problem."

"It's a quantum-inspired simulation where you make decisions when exploring the data, giving you a good understanding of the relationships in your data and closing the loop between scientific inquiry and data science."

https://docs.abzu.ai/

https://docs.abzu.ai/docs/guides/getting_started/community.html

#Code by Casper Wilstrup https://www.kaggle.com/wilstrup/use-qlattice-to-predict-rainy-days-in-australia/notebook

In [None]:
!pip install feyn

In [None]:
import feyn

In [None]:
df = pd.read_csv('../input/weather-istanbul-data-20092019/Istanbul Weather Data.csv')
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
# categorical features with missing values
categorical_nan = [feature for feature in df.columns if df[feature].isna().sum()>0 and df[feature].dtypes=='O']
print(categorical_nan)

In [None]:
# replacing missing values in categorical features
for feature in categorical_nan:
    df[feature] = df[feature].fillna('None')

In [None]:
df[categorical_nan].isna().sum()

In [None]:
df["week"] = pd.to_datetime(df.DateTime).dt.isocalendar().week.astype(int)
df = df.drop("DateTime", axis=1)

In [None]:
#Remove colon to avoid errors ValueError: invalid literal for int() with base 10

#Code by Sohom Majumder https://www.kaggle.com/sohommajumder21/statistical-tests-for-understanding-life-value/comments

df['SunRise']=df['SunRise'].str.replace(':', '')
df['SunRise'] = pd.to_numeric(df['SunRise'], errors = 'coerce')
df['SunSet']=df['SunSet'].str.replace(':', '')
df['Sunset'] = pd.to_numeric(df['SunSet'], errors = 'coerce')
df['MoonRise']=df['MoonRise'].str.replace(':', '')
df['MoonRise'] = pd.to_numeric(df['MoonRise'], errors = 'coerce')
df['MoonSet']=df['MoonSet'].str.replace(':', '')
df['Moonset'] = pd.to_numeric(df['MoonSet'], errors = 'coerce')

In [None]:
#Code by Tejashvi14 https://www.kaggle.com/tejashvi14/casualty-analysis/notebook

#After that snippet the float(Rain) and the other objects become integers.

df["Rain"]=df["Rain"].apply(int)
df["SunRise"]=df["SunRise"].apply(int)
df["SunSet"]=df["SunSet"].apply(int)
df["MoonRise"] = df["MoonRise"].apply(int)
df["MoonSet"] = df["MoonSet"].apply(int)

In [None]:
#df.SunRise = df.SunRise.fillna("-1:0:0").apply(lambda s: s.split(":")[0]).astype(int)
#df.SunSet = df.SunSet.fillna("-1:0:0").apply(lambda s: s.split(":")[0]).astype(int)
#df.MoonRise = df.MoonRise.fillna("-1:0:0").apply(lambda s: s.split(":")[0]).astype(int)
#df.MoonSet = df.MoonSet.fillna("-1:0:0").apply(lambda s: s.split(":")[0]).astype(int)

#ValueError: invalid literal for int() with base 10: 'None'

In [None]:
df.dtypes

In [None]:
stypes = {
    "Condition": "cat",
   # "SunRise": "cat",
    #"SunSet": "cat",
    "MoonRise": "cat",
    "MoonSet": "cat",
    }

In [None]:
#Code by Bizen https://www.kaggle.com/hiro5299834/tps-apr-2021-deebtables/notebook

TARGET = 'Rain' #Target could Not be float otherwise will result in valueError: Unknown label type: 'continuous'. Even after the encoding.

label_cols = ['Condition','MoonRise', 'MoonSet']
numerical_cols = ['SunRise', 'SunSet', 'MaxTemp', 'MinTemp', 'AvgWind', 'AvgHumidity', 'AvgPressure', 'week']

In [None]:
#Code by Bizen https://www.kaggle.com/hiro5299834/tps-apr-2021-deebtables/notebook

from sklearn.preprocessing import LabelEncoder

def label_encoder(c):
    le = LabelEncoder()
    return le.fit_transform(c)

label_encoded_df = df[label_cols].apply(label_encoder)
numerical_df = df[numerical_cols]
target_df = df[TARGET]

df = pd.concat([label_encoded_df, numerical_df, target_df], axis=1)

#Check if the Label Encoding worked

In [None]:
df.head()

In [None]:
train, test = feyn.tools.split(df, ratio=(1,1), random_state=42)

#Community QLattice

We are now ready to connect to the QLattice. The feyn module will look in you local configuration file to see if we have a commercial QLattice. If not, it will allocate a community QLattice for us on the Abzu compute cluster.

In [None]:
ql = feyn.connect_qlattice()

#Reproducibility

The qlattice will be reset when we get it, but to ensure that we get exactly the same result every time we run the notebook we need to seed the QLattice. This is done with the reset method

In [None]:
ql.reset(random_seed=42)

#Search for the best model

We are now ready to instruct the QLattice to search for the best mathematical model to explain the data. Here we use the high-level convenience function that does everything with sensible defaults: https://docs.abzu.ai/docs/guides/essentials/auto_run.html.

For more detailed control, we could use the primitives: https://docs.abzu.ai/docs/guides/primitives/using_primitives.html

Notice that the stypes dictionary we created earlier gets passed to the QLattice here.

NOTE: This will take several minutes to complete. It invoves work done on the QLattice machine remotely as well as in the local notebook. The part that runs locally is slowing things down because of the limited CPU resources on Kaggle. Running the same on my machine locally only takes 20 seconds!

#If the target variable is continuous rather than boolean, so you should use a Regression model rather than a classification model. Just change the auto_run line to:

models = ql.auto_run(train, output_name="Rain", kind="regression", stypes = stypes)

In [None]:
models = ql.auto_run(train, output_name="Rain", kind="regression", stypes = stypes)

In [None]:
models[0].plot_roc_curve(train)
models[0].plot_roc_curve(test)

Above I got ValueError: multiclass format is not supported

#Below: Let's try models without specifying the kind (Regression or classification).

In [None]:
models = ql.auto_run(train, output_name="Rain", stypes = stypes)

In [None]:
models[0].plot_confusion_matrix(test, threshold=.3)

In [None]:
models[0].plot_confusion_matrix(test, threshold=.5)

In [None]:
models[0].sympify(2)

In [None]:
models[0].plot(test)