<a href="https://colab.research.google.com/github/nfinan/NanomineML-PredictTg/blob/main/NM_W%26B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import json
import requests
import pandas as pd

Test
<img src="https://i.imgur.com/tOdqBxM.png" width="400" alt="Nanomine" />

<div><img /></div>

###Simplified SPARQL submission object

In [None]:
class nm_rdf_query:

    def __init__(self, query, url='https://materialsmine.org/wi/sparql'):
        self.query = query
        self.url = url

    def submit(self):
        query = self.query
        url = self.url
        r = requests.get(url, params = {'format': 'json', 'query': query})

        data = r.json()
        data_df = pd.json_normalize(data, sep='_')

        results = data_df.results_bindings[0]
        results_df = pd.json_normalize(results, sep='_')

        self.results_df = results_df # Return

    def filter_thermosets(self):
        results_df = self.results_df
        initial_input = {
        "values": [
                    {"category": "Thermoplastic", "matrixtype": "Polystyrene"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(methyl methacrylate)"},
                    {"category": "Thermoplastic", "matrixtype": "Nylon 6(3)T"},
                    {"category": "Thermoplastic", "matrixtype": "Polybenzimidazole"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(vinyl alcohol)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(vinyl butyral)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(vinyl chloride)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(vinylidene fluoride)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(ethylene terephthalate)"},
                    {"category": "Thermoplastic", "matrixtype": "Polypropylene"},
                    {"category": "Thermoplastic", "matrixtype": "Polyurethane"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(ethyl methacrylate)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(2-vinyl pyridine)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(acrylonitrile)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(styrene-co-acrylonitrile)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(bisphenol A carbonate)"},
                    {"category": "Thermoplastic", "matrixtype": "Poly(ethylene-vinyl acetate)"},
                    {"category": "Thermoplastic", "matrixtype": "Polyamide-imide"},
                    {"category": "Thermoplastic", "matrixtype": "Polyimide"},
                    {"category": "Thermoplastic", "matrixtype": "Polylactic acid"},
                    {"category": "Thermoplastic", "matrixtype": "Polyphenylsulfone"},
                    {"category": "Thermoset", "matrixtype": "DGEBA Epoxy Resin"},
                    {"category": "Thermoset", "matrixtype": "DGEBF Epoxy Resin"},
                    {"category": "Thermoset", "matrixtype": "Cycloaliphatic Epoxy Resin"},
                    {"category": "Thermoset", "matrixtype": "Bisphenol E cyanate ester resin"},
                    {"category": "Thermoset", "matrixtype": "Bisphenol-A-epoxy vinyl ester resin"},
                    {"category": "Elastomer", "matrixtype": "Poly(dimethyl siloxane)"}
                ]
        }

        values = initial_input['values']

        # We shall create a thermoset and elastomer blacklist
        filter = []
        for dictionary in values:
            if (dictionary['category'].casefold() == "Thermoset".casefold() or dictionary['category'].casefold() == "Elastomer".casefold()):
                filter.append(dictionary['matrixtype'])

        # display("Filter:",filter)

        # Now we apply the filter

        mask = ~results_df.matrix_value.str.casefold().isin([x.casefold() for x in filter]) # Here we make a mask (true when not in filter (case insensitive))

        filtered_data = results_df[mask] # Here we apply the mask to the data

        self.filtered_data = filtered_data # Return

    def standard_filtered_query(self):
        self.submit()
        self.unfiltered = self.results_df
        self.filter_thermosets()
        self.results_df = self.filtered_data
        return self.filtered_data


###QUERIES

QUERY 1: Silica dTg

In [None]:
query1 = """
PREFIX nm: <http://nanomine.org/ns/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?VolFrac ?Tg ?controlTg ?deltaTg (LCASE(?SurfaceTreatmentType) AS ?PST) (LCASE(?MatrixType) AS ?matrix) ?sample ?doi WHERE {
  ?sample a nm:PolymerNanocomposite;
          sio:hasComponentPart ?FillerPart ,
                               ?MatrixPart .
  
  ?doi sio:hasPart ?sample .

  ?FillerPart sio:hasRole [ a nm:Filler ] ;
              a [ rdfs:label "Silicon dioxide" ] ;
              sio:hasAttribute [ a nm:VolumeFraction ;
                                 sio:hasValue ?VolFrac ] .
  
  ?MatrixPart sio:hasRole [ a nm:Matrix ] ;
              a [ rdfs:label ?MatrixType ] .
  
  ?sample sio:hasAttribute [ a nm:GlassTransitionTemperature ;
                             sio:hasValue ?Tg ;
                             sio:hasUnit [ rdfs:label "Celsius" ] ] .
  

  ?controlsample sio:hasRole [ a sio:ControlRole ;
                               sio:inRelationTo ?sample ] ;
                 sio:hasAttribute [ a nm:GlassTransitionTemperature ;
                                    sio:hasValue ?controlTg ;
                                    sio:hasUnit [ rdfs:label "Celsius" ] ] .
  
  BIND ( ?Tg - ?controlTg AS ?deltaTg )
  
  OPTIONAL {
    ?FillerPart sio:isSurroundedBy [ sio:hasRole [ a nm:SurfaceTreatment ] ;
                                     a [ rdfs:label ?SurfaceTreatmentType ] ] .
  }                   
}
"""

QUERY 2: all of NM

In [None]:
query2 = """
PREFIX nm: <http://nanomine.org/ns/>

SELECT * WHERE {
?sample a nm:PolymerNanocomposite .
}
"""

QUERY 3: Tg values with reported uncertainty

In [None]:
query3 = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX nm: <http://nanomine.org/ns/>
SELECT DISTINCT ?sample ?MassFraction ?Filler ?Matrix ?Tg ?uncertaintyTg  WHERE {
  ?sample a nm:PolymerNanocomposite ;
          sio:hasAttribute ?attr ;
          sio:hasComponentPart [ sio:hasRole [ a nm:Matrix ] ;
                                 a [ rdfs:label ?Matrix ] ] ,
                               [ sio:hasRole [ a nm:Filler ] ;
                                 a [ rdfs:label ?Filler ] ;
                                 sio:hasAttribute [ a nm:MassFraction ;
                                                    sio:hasValue ?MassFraction ] ] .
  ?attr a nm:GlassTransitionTemperature ; 
        sio:hasValue ?Tg ;
        sio:hasUnit <http://www.ontology-of-units-of-measure.org/resource/om-2/degreeCelsius> ;
        sio:hasAttribute [ a sio:UncertaintyValue ; 
                           sio:hasValue ?uncertaintyTg ] .
}
"""

###SUBMISSIONS

SUBMISSION 1: dTg silica query (filtered)

In [None]:
Q1 = nm_rdf_query(query1)
Q1.standard_filtered_query()
display(Q1.results_df)

Unnamed: 0,VolFrac_datatype,VolFrac_type,VolFrac_value,Tg_datatype,Tg_type,Tg_value,controlTg_datatype,controlTg_type,controlTg_value,deltaTg_datatype,deltaTg_type,deltaTg_value,PST_type,PST_value,matrix_type,matrix_value,sample_type,sample_value,doi_type,doi_value
0,http://www.w3.org/2001/XMLSchema#double,literal,0.0019682431537311456,http://www.w3.org/2001/XMLSchema#double,literal,101.809195,http://www.w3.org/2001/XMLSchema#double,literal,101.0,http://www.w3.org/2001/XMLSchema#double,literal,0.8091950000000026,literal,polystyrene,literal,polystyrene,uri,http://nanomine.org/sample/l219-s2-bansal-2006,uri,http://dx.doi.org/10.1002/polb.20926
1,http://www.w3.org/2001/XMLSchema#double,literal,0.007945603178241271,http://www.w3.org/2001/XMLSchema#double,literal,102.9954,http://www.w3.org/2001/XMLSchema#double,literal,101.0,http://www.w3.org/2001/XMLSchema#double,literal,1.9954000000000036,literal,polystyrene,literal,polystyrene,uri,http://nanomine.org/sample/l219-s3-bansal-2006,uri,http://dx.doi.org/10.1002/polb.20926
2,http://www.w3.org/2001/XMLSchema#double,literal,0.020237400272426546,http://www.w3.org/2001/XMLSchema#double,literal,105.17471,http://www.w3.org/2001/XMLSchema#double,literal,101.0,http://www.w3.org/2001/XMLSchema#double,literal,4.174710000000005,literal,polystyrene,literal,polystyrene,uri,http://nanomine.org/sample/l219-s4-bansal-2006,uri,http://dx.doi.org/10.1002/polb.20926
3,http://www.w3.org/2001/XMLSchema#double,literal,0.0019682431537311456,http://www.w3.org/2001/XMLSchema#double,literal,101.0,http://www.w3.org/2001/XMLSchema#double,literal,101.0,http://www.w3.org/2001/XMLSchema#double,literal,0.0,literal,polystyrene,literal,polystyrene,uri,http://nanomine.org/sample/l219-s6-bansal-2006,uri,http://dx.doi.org/10.1002/polb.20926
4,http://www.w3.org/2001/XMLSchema#double,literal,0.007945603178241271,http://www.w3.org/2001/XMLSchema#double,literal,101.0,http://www.w3.org/2001/XMLSchema#double,literal,101.0,http://www.w3.org/2001/XMLSchema#double,literal,0.0,literal,polystyrene,literal,polystyrene,uri,http://nanomine.org/sample/l219-s7-bansal-2006,uri,http://dx.doi.org/10.1002/polb.20926
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,http://www.w3.org/2001/XMLSchema#double,literal,0.022330097087378643,http://www.w3.org/2001/XMLSchema#double,literal,92.5,http://www.w3.org/2001/XMLSchema#double,literal,74.1,http://www.w3.org/2001/XMLSchema#double,literal,18.400000000000006,,,literal,poly(methyl methacrylate),uri,http://nanomine.org/sample/l283-s4-salami-2012,uri,http://dx.doi.org/10.1007/s10965-011-9793-1
168,http://www.w3.org/2001/XMLSchema#double,literal,0.03163064833005894,http://www.w3.org/2001/XMLSchema#double,literal,92.3,http://www.w3.org/2001/XMLSchema#double,literal,76.9,http://www.w3.org/2001/XMLSchema#double,literal,15.399999999999991,,,literal,poly(methyl methacrylate),uri,http://nanomine.org/sample/l283-s5-salami-2012,uri,http://dx.doi.org/10.1007/s10965-011-9793-1
169,http://www.w3.org/2001/XMLSchema#double,literal,0.03163064833005894,http://www.w3.org/2001/XMLSchema#double,literal,92.3,http://www.w3.org/2001/XMLSchema#double,literal,74.1,http://www.w3.org/2001/XMLSchema#double,literal,18.200000000000003,,,literal,poly(methyl methacrylate),uri,http://nanomine.org/sample/l283-s5-salami-2012,uri,http://dx.doi.org/10.1007/s10965-011-9793-1
170,http://www.w3.org/2001/XMLSchema#double,literal,0.03163064833005894,http://www.w3.org/2001/XMLSchema#double,literal,94.4,http://www.w3.org/2001/XMLSchema#double,literal,74.1,http://www.w3.org/2001/XMLSchema#double,literal,20.30000000000001,,,literal,poly(methyl methacrylate),uri,http://nanomine.org/sample/l283-s5-salami-2012,uri,http://dx.doi.org/10.1007/s10965-011-9793-1


SUBMISSION 2: All of NanoMine

In [None]:
Q2 = nm_rdf_query(query2)
Q2.submit()
display(Q2.results_df)

Unnamed: 0,sample_type,sample_value
0,uri,http://nanomine.org/sample/e108-s1-huang-2016
1,uri,http://nanomine.org/sample/e108-s2-huang-2016
2,uri,http://nanomine.org/sample/e108-s3-huang-2016
3,uri,http://nanomine.org/sample/e108-s5-huang-2016
4,uri,http://nanomine.org/sample/e108-s7-huang-2016
...,...,...
1981,uri,http://nanomine.org/sample/l390-s5-mortezaei-2011
1982,uri,http://nanomine.org/sample/l391-s1-mortezaei-2011
1983,uri,http://nanomine.org/sample/l391-s2-mortezaei-2011
1984,uri,http://nanomine.org/sample/l391-s3-mortezaei-2011


SUBMISSION 3: Tg values with reported uncertainty

In [None]:
Q3 = nm_rdf_query(query3)
Q3.submit()
display(Q3.results_df)

Unnamed: 0,sample_type,sample_value,MassFraction_datatype,MassFraction_type,MassFraction_value,Filler_type,Filler_value,Matrix_type,Matrix_value,Tg_datatype,Tg_type,Tg_value,uncertaintyTg_datatype,uncertaintyTg_type,uncertaintyTg_value
0,uri,http://nanomine.org/sample/l212-s3-ghanbari-2012,http://www.w3.org/2001/XMLSchema#double,literal,0.04,literal,Montmorillonite,literal,Poly(ethylene terephthalate),http://www.w3.org/2001/XMLSchema#double,literal,75.1,http://www.w3.org/2001/XMLSchema#double,literal,0.3
1,uri,http://nanomine.org/sample/l212-s4-ghanbari-2012,http://www.w3.org/2001/XMLSchema#double,literal,0.04,literal,Montmorillonite,literal,Poly(ethylene terephthalate),http://www.w3.org/2001/XMLSchema#double,literal,76.2,http://www.w3.org/2001/XMLSchema#double,literal,0.1
2,uri,http://nanomine.org/sample/l212-s5-ghanbari-2012,http://www.w3.org/2001/XMLSchema#double,literal,0.04,literal,Montmorillonite,literal,Poly(ethylene terephthalate),http://www.w3.org/2001/XMLSchema#double,literal,77.1,http://www.w3.org/2001/XMLSchema#double,literal,0.4
3,uri,http://nanomine.org/sample/l212-s6-ghanbari-2012,http://www.w3.org/2001/XMLSchema#double,literal,0.04,literal,Montmorillonite,literal,Poly(ethylene terephthalate),http://www.w3.org/2001/XMLSchema#double,literal,76.0,http://www.w3.org/2001/XMLSchema#double,literal,0.1
4,uri,http://nanomine.org/sample/l174-s3-ramanathan-...,http://www.w3.org/2001/XMLSchema#double,literal,0.01,literal,Graphite,literal,Poly(methyl methacrylate),http://www.w3.org/2001/XMLSchema#double,literal,112.0,http://www.w3.org/2001/XMLSchema#double,literal,2.2
5,uri,http://nanomine.org/sample/l179-s2-ramanathan-...,http://www.w3.org/2001/XMLSchema#double,literal,0.01,literal,Graphite,literal,Poly(methyl methacrylate),http://www.w3.org/2001/XMLSchema#double,literal,137.990552095732,http://www.w3.org/2001/XMLSchema#double,literal,3.978501
6,uri,http://nanomine.org/sample/l179-s3-ramanathan-...,http://www.w3.org/2001/XMLSchema#double,literal,0.02,literal,Graphite,literal,Poly(methyl methacrylate),http://www.w3.org/2001/XMLSchema#double,literal,133.943191816033,http://www.w3.org/2001/XMLSchema#double,literal,3.389111
7,uri,http://nanomine.org/sample/l179-s4-ramanathan-...,http://www.w3.org/2001/XMLSchema#double,literal,0.05,literal,Graphite,literal,Poly(methyl methacrylate),http://www.w3.org/2001/XMLSchema#double,literal,129.003168469416,http://www.w3.org/2001/XMLSchema#double,literal,0.294695
8,uri,http://nanomine.org/sample/l179-s5-ramanathan-...,http://www.w3.org/2001/XMLSchema#double,literal,0.01,literal,Graphite,literal,Poly(methyl methacrylate),http://www.w3.org/2001/XMLSchema#double,literal,112.935578288796,http://www.w3.org/2001/XMLSchema#double,literal,2.136613
9,uri,http://nanomine.org/sample/l174-s2-ramanathan-...,http://www.w3.org/2001/XMLSchema#double,literal,0.01,literal,Single-wall carbon nanotubes,literal,Poly(methyl methacrylate),http://www.w3.org/2001/XMLSchema#double,literal,104.8,http://www.w3.org/2001/XMLSchema#double,literal,2.1


#W and B Sweep
<img src="https://i.imgur.com/gb6B4ig.png" width="400" alt="Weights & Biases" />

<div><img /></div>


Use submission 1 (Q1.results_df)

In [None]:
# Q1.results_df.contains(_value)
# mask = Q1.results_df.columns.str.contains("_value")
# print(mask or Q1.results_df.columns.to_numpy())
# Q1.results_df[mask.tolist()]

# df = Q1.results_df.filter(regex='_value')
# features = ["VolFrac_value", "Tg_value", "controlTg_value", "PST_value", "matrix_value", "sample_value", "doi_value"]
# label = "deltaTg_value"
# X = df[features]
# y = df[label]

In [None]:
%%capture
!pip install wandb

In [None]:
import wandb
from wandb.keras import WandbCallback
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
from sklearn.preprocessing import MinMaxScaler
df = Q1.results_df.filter(regex='_value').drop(["sample_value", "doi_value"], axis=1)
df = pd.get_dummies(data=df, dummy_na=True, columns=["matrix_value", "PST_value"])


display(df)
scaler = MinMaxScaler() 
df.loc[:,:] = scaler.fit_transform(df)
display(df)
df.loc[:,:] = scaler.inverse_transform(df)
display(df)

# onehot_transformer = Pipeline(steps=[
#                                      ('onehot', pd.get_dummies())
# ])

# minmax_transformer = Pipeline(steps=[
#         ('minmax', MinMaxScaler())])


# preprocessor = ColumnTransformer(
#         remainder='passthrough', #passthough features not listed
#         transformers=[
#             ('std', standard_transformer , ['z']),
#             ('mm', minmax_transformer , ['x','y'])
#         ])

Unnamed: 0,VolFrac_value,Tg_value,controlTg_value,deltaTg_value,matrix_value_nylon 6(3)t,matrix_value_poly(ethylene terephthalate),matrix_value_poly(methyl methacrylate),matrix_value_poly(vinyl alcohol),matrix_value_poly(vinyl butyral),matrix_value_polyamide-imide,matrix_value_polybenzimidazole,matrix_value_polyimide,matrix_value_polystyrene,matrix_value_polyurethane,matrix_value_nan,PST_value_polystyrene,PST_value_nan
0,0.0019682431537311456,101.809195,101.0,0.8091950000000026,0,0,0,0,0,0,0,0,1,0,0,1,0
1,0.007945603178241271,102.9954,101.0,1.9954000000000036,0,0,0,0,0,0,0,0,1,0,0,1,0
2,0.020237400272426546,105.17471,101.0,4.174710000000005,0,0,0,0,0,0,0,0,1,0,0,1,0
3,0.0019682431537311456,101.0,101.0,0.0,0,0,0,0,0,0,0,0,1,0,0,1,0
4,0.007945603178241271,101.0,101.0,0.0,0,0,0,0,0,0,0,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,0.022330097087378643,92.5,74.1,18.400000000000006,0,0,1,0,0,0,0,0,0,0,0,0,1
168,0.03163064833005894,92.3,76.9,15.399999999999991,0,0,1,0,0,0,0,0,0,0,0,0,1
169,0.03163064833005894,92.3,74.1,18.200000000000003,0,0,1,0,0,0,0,0,0,0,0,0,1
170,0.03163064833005894,94.4,74.1,20.30000000000001,0,0,1,0,0,0,0,0,0,0,0,0,1


Unnamed: 0,VolFrac_value,Tg_value,controlTg_value,deltaTg_value,matrix_value_nylon 6(3)t,matrix_value_poly(ethylene terephthalate),matrix_value_poly(methyl methacrylate),matrix_value_poly(vinyl alcohol),matrix_value_poly(vinyl butyral),matrix_value_polyamide-imide,matrix_value_polybenzimidazole,matrix_value_polyimide,matrix_value_polystyrene,matrix_value_polyurethane,matrix_value_nan,PST_value_polystyrene,PST_value_nan
0,0.002190,0.218614,0.237978,0.374196,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,0.021337,0.221583,0.237978,0.392087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,0.060710,0.227037,0.237978,0.424958,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,0.002190,0.216588,0.237978,0.361991,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,0.021337,0.216588,0.237978,0.361991,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,0.067413,0.195313,0.166053,0.639517,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
168,0.097205,0.194813,0.173540,0.594268,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
169,0.097205,0.194813,0.166053,0.636501,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
170,0.097205,0.200069,0.166053,0.668175,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,VolFrac_value,Tg_value,controlTg_value,deltaTg_value,matrix_value_nylon 6(3)t,matrix_value_poly(ethylene terephthalate),matrix_value_poly(methyl methacrylate),matrix_value_poly(vinyl alcohol),matrix_value_poly(vinyl butyral),matrix_value_polyamide-imide,matrix_value_polybenzimidazole,matrix_value_polyimide,matrix_value_polystyrene,matrix_value_polyurethane,matrix_value_nan,PST_value_polystyrene,PST_value_nan
0,0.001968,101.809195,101.0,0.809195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,0.007946,102.995400,101.0,1.995400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,0.020237,105.174710,101.0,4.174710,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,0.001968,101.000000,101.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,0.007946,101.000000,101.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,0.022330,92.500000,74.1,18.400000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
168,0.031631,92.300000,76.9,15.400000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
169,0.031631,92.300000,74.1,18.200000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
170,0.031631,94.400000,74.1,20.300000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
def train():
    # Specify the hyperparameter to be tuned along with
    # an initial value
    config_defaults = {
        'C': 1,
        'kernel': 'rbf',
        'degree': 3
    }

    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults, resume=True)
    wandb.mark_preempting()

    # Load data and prepare
    df = Q1.results_df.filter(regex='_value').drop(["sample_value", "doi_value"], axis=1)
    
    df = pd.get_dummies(data=df, dummy_na=True, columns=["matrix_value", "PST_value"])
    # feature_names = ["VolFrac_value", "Tg_value", "controlTg_value", "PST_value", "matrix_value", "sample_value", "doi_value"]
    scaler = MinMaxScaler() 
    df.loc[:,:] = scaler.fit_transform(df)

    label = "deltaTg_value"
    
    y = df[label]
    X = df.drop(label, axis=1)
    # display(X)
    

    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        test_size=0.2,
                                                        random_state=42)

    # initialize support vector classifier using configs
    clf = SVR(C=wandb.config.C,
                kernel=wandb.config.kernel,
                degree=wandb.config.degree)
      
    # train and get predictions
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    # y_probas = clf.predict_proba(X_test)
    
    # log metrics
    wandb.log({'train_score': clf.score(X_train, y_train),
              'test_score': clf.score(X_test, y_test)})

In [None]:
# df = Q1.results_df.filter(regex='_value')
# df2 = pd.get_dummies(data=df, dummy_na=True)
# df2

In [None]:
sweep_config = {
  'method': 'random', 
  'metric': {
      'name': 'test_score',
      'goal': 'maximize'
  },
  'parameters': {
      'C': {
          'values': [0.2, 0.5, 1., 1.5, 2]
      },
      'kernel':{
          'values': ['linear', 'poly', 'rbf', 'sigmoid']
      },
      'degree':{
          'values': [2, 3, 5, 7, 9]
      }
  }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="my-scikit-sweep-random")

Create sweep with ID: tdfq09os
Sweep URL: https://wandb.ai/nfinan/my-scikit-sweep-random/sweeps/tdfq09os


In [None]:
wandb.agent(sweep_id, count=10, function=train)

[34m[1mwandb[0m: Agent Starting Run: x1bgcqfu with config:
[34m[1mwandb[0m: 	C: 2
[34m[1mwandb[0m: 	degree: 7
[34m[1mwandb[0m: 	kernel: linear


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.59775
test_score,0.66936
_runtime,2.0
_timestamp,1622997593.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: xm2oali5 with config:
[34m[1mwandb[0m: 	C: 0.5
[34m[1mwandb[0m: 	degree: 3
[34m[1mwandb[0m: 	kernel: rbf


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.58486
test_score,0.63712
_runtime,2.0
_timestamp,1622997600.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 08b5z7gf with config:
[34m[1mwandb[0m: 	C: 0.2
[34m[1mwandb[0m: 	degree: 2
[34m[1mwandb[0m: 	kernel: rbf


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.52374
test_score,0.54781
_runtime,2.0
_timestamp,1622997607.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 4at4rhne with config:
[34m[1mwandb[0m: 	C: 1
[34m[1mwandb[0m: 	degree: 3
[34m[1mwandb[0m: 	kernel: rbf


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.61551
test_score,0.67907
_runtime,2.0
_timestamp,1622997614.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 8a6rxyu2 with config:
[34m[1mwandb[0m: 	C: 2
[34m[1mwandb[0m: 	degree: 9
[34m[1mwandb[0m: 	kernel: rbf


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.67555
test_score,0.74053
_runtime,2.0
_timestamp,1622997621.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 9z05ilqf with config:
[34m[1mwandb[0m: 	C: 1
[34m[1mwandb[0m: 	degree: 9
[34m[1mwandb[0m: 	kernel: sigmoid


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.39033
test_score,0.53841
_runtime,3.0
_timestamp,1622997629.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: ed9dprza with config:
[34m[1mwandb[0m: 	C: 1.5
[34m[1mwandb[0m: 	degree: 3
[34m[1mwandb[0m: 	kernel: sigmoid


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.26467
test_score,0.48935
_runtime,3.0
_timestamp,1622997636.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: ai5594hc with config:
[34m[1mwandb[0m: 	C: 0.5
[34m[1mwandb[0m: 	degree: 7
[34m[1mwandb[0m: 	kernel: poly


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.76541
test_score,0.74805
_runtime,3.0
_timestamp,1622997643.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: hmov0yce with config:
[34m[1mwandb[0m: 	C: 1
[34m[1mwandb[0m: 	degree: 2
[34m[1mwandb[0m: 	kernel: poly


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.6408
test_score,0.68364
_runtime,2.0
_timestamp,1622997650.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: sknfv5do with config:
[34m[1mwandb[0m: 	C: 1
[34m[1mwandb[0m: 	degree: 3
[34m[1mwandb[0m: 	kernel: sigmoid


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_score,0.39033
test_score,0.53841
_runtime,2.0
_timestamp,1622997657.0
_step,0.0


0,1
train_score,▁
test_score,▁
_runtime,▁
_timestamp,▁
_step,▁
