In [1]:
import tensorflow as tf
#from tensorflow import keras
import numpy as np
#import time
import json
import pandas as pd
from statistics import mean
#import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
#from sklearn import preprocessing

In [2]:
file = open(f"shallow_deps/def_type.json", "r", encoding='utf-8')
js = file.read()
def_type = json.loads(js)
file.close()

types = [row for row in def_type["types"]]
terms = [row for row in def_type["defn"]]

In [3]:
if tf.config.list_physical_devices('GPU'):
    physical_devices = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
    tf.config.experimental.set_virtual_device_configuration(physical_devices[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])

In [None]:
model = Sequential()
model.add(Dense(len(terms), input_shape = (len(types),), activation = "softplus"))
model.compile(Adam(learning_rate = 0.005), "categorical_crossentropy", metrics = ["accuracy"])
model.summary()

<keras.engine.sequential.Sequential object at 0x000002141C542520>


In [None]:
x_tests = []
y_tests = []

In [None]:
epochs = 2
for j in range(epochs):
    for i in range(31):

        print("Reading file :", i)
        file = open(f"shallow_deps/split{0}.json", "r", encoding='utf-8')
        js = file.read()
        data = json.loads(js)[:1000]
        file.close()

        #curr_time = time.time()
        input_list = []
        output_list = []

        print("Creating input and output files", i)
        len_types = len(types)
        len_terms = len(terms)
        for proof in data:
            input_def = [0]*len_types
            output_def = [0]*len_terms

            typs = set(proof['type'])
            for typ in typs:
                input_def[types.index(typ)] += proof['type'].count(typ)
            input_list.append(input_def)

            trms = set(proof['defn'])
            for trm in trms:
                output_def[terms.index(trm)] += proof['type'].count(trm)
            output_list.append(output_def)

        input_list = np.asarray(input_list)
        output_list = np.asarray(output_list)
        #print('time :', time.time() - curr_time)

        print("Splitting into train and test")
        no_test = 100
        x_train, x_test, y_train, y_test = train_test_split(input_list, output_list, test_size=no_test/len(data))
        x_tests.extend(x_test)
        y_tests.extend(y_test)

        print("Model fitting")
        model.fit(x_train, y_train, epochs = 1)

In [None]:
model.save('Model')

In [7]:
x_tests = np.array(x_tests)
y_tests = np.array(y_tests)

In [8]:
model.evaluate(x_tests, y_tests)



[27.27548599243164, 0.05999999865889549]

In [9]:
def get_score(y_tests, y_preds):
    
    df = pd.DataFrame(list(zip(y_tests, y_preds)), columns =['y_tests', 'y_preds'])
    df = df.sort_values(by="y_preds", ascending=False)
    
    df['y_pred'] = range(len(df))
    sum_val = df['y_tests'].sum()
    
    if sum_val:
        return (df['y_tests']*df['y_pred']).sum()/sum_val
    else:
        return 0

In [10]:
y_preds = model.predict(x_tests)



In [11]:
def avg_score():
    
    print('\n',f"Calculating average score for {len(y_preds)} elements.")
    
    list_score = []
    
    for i in range(len(y_preds)):
        score = get_score(y_tests[i], y_preds[i])
        list_score.append(score)
    
    avg_per = mean(list_score)
    print(f"Average rank elements is {round(avg_per)} out of {len(terms)} total terms.")
    
    print('Ratio :', avg_per/len(terms))
    return avg_per

In [12]:
avg_score()


 Calculating average score for 100 elements.
Average rank elements is 7624 out of 89714 total terms.
Ratio : 0.08497624852045485


7623.559159764087

In [13]:
cut_off = 10

for i in range(len(y_tests)):
    df = pd.DataFrame(list(zip(terms, y_tests[i], y_preds[i])), columns =['terms', 'y_tests_count', 'y_preds_index'])
    df = df.sort_values(by="y_preds_index", ascending=False)

    df['y_preds_index'] = range(len(df))
    sum_val = df['y_tests_count'].sum()

    df.index = range(len(df))

    if sum_val:
        avg = (df['y_tests_count']*df['y_preds_index']).sum()/(sum_val)
    else:
        avg = 0
        
    if avg <= cut_off:
        print('\n -  -  -  \n')
        if str(avg) == "0.0":
            print('Average', avg)
            display(df[:1])
        elif avg > 0:
            print('Average', avg)
            display(df[:df['y_tests_count'].iloc[::-1].ne(0).idxmax()+1])
        else:
            print('Average', avg, '(no elements in defn)')


 -  -  -  

Average 0.0


Unnamed: 0,terms,y_tests_count,y_preds_index
0,Nat,1,0



 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 8.0


Unnamed: 0,terms,y_tests_count,y_preds_index
0,Eq,0,0
1,Zero.zero,0,1
2,Real,0,2
3,Nat,0,3
4,Ennreal,0,4
5,Real.measureSpace,0,5
6,HMul.hMul,0,6
7,Real.preorder,0,7
8,HAdd.hAdd,2,8



 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 1.0


Unnamed: 0,terms,y_tests_count,y_preds_index
0,Eq,0,0
1,coeFn,2,1



 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0.0


Unnamed: 0,terms,y_tests_count,y_preds_index
0,Nat,5,0



 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 2.0


Unnamed: 0,terms,y_tests_count,y_preds_index
0,Nat,0,0
1,Eq,0,1
2,One.one,1,2



 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  - 

Unnamed: 0,terms,y_tests_count,y_preds_index
0,coeFn,0,0
1,Eq,0,1
2,HPow.hPow,0,2
3,Nat,4,3
4,Exists,0,4
5,Real,4,5



 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 3.142857142857143


Unnamed: 0,terms,y_tests_count,y_preds_index
0,Nat,2,0
1,Eq,2,1
2,Zero.zero,1,2
3,HAdd.hAdd,0,3
4,HMul.hMul,0,4
5,coeFn,0,5
6,One.one,1,6
7,Exists,0,7
8,And,0,8
9,Iff,0,9



 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)

 -  -  -  

Average 0 (no elements in defn)
