In [1]:
from pgmpy.sampling import HamiltonianMCDA as HMCda, GradLogPDFGaussian as GLPG, LeapFrog

In [2]:
%matplotlib inline
from pgmpy.factors.distributions import GaussianDistribution as JGD
from pgmpy.sampling import LeapFrog, GradLogPDFGaussian
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from pgmpy.models.BayesianModel import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
student = BayesianModel([('schoolsupport', 'age'), ('famsupport', 'extraclasses'), ('motheredu', 'college'),
                        ('alcweekend', 'goingoutrating'), ( 'freetimerating', 'goingoutrating'), ('alcweekend', 'studytime'),
                        ('goingoutrating', 'failure'), ('grade', 'failure'), ('extraclasses', 'grade'), ('college', 'grade')])

In [22]:
import os
import pandas as pd
import pydot
from IPython.display import SVG
from pycausal.pycausal import pycausal as pycausal
from pycausal import search as search

In [5]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score

In [6]:
def add_to_arr(nested, new_arr):
    for i, x in enumerate(new_arr):
        nested[i].append(x)
    return nested

In [7]:
def norm(arr):
    s = sum(arr)
    return [float(x)/s for x in arr]

In [8]:
def norm_2d(arr):
    num_col = len(arr[0])
    nested = [[] for x in arr]
    for i in range(num_col):
        col = map(lambda x : x[i], arr)
        #print col
        nested = add_to_arr(nested, norm(col))
        #print nested
    return nested  

In [80]:
### EASY GRAPH 
model = BayesianModel([('grade', 'scholarship'),
                       ('grade', 'college'),
                       ('college', 'scholarship'),
                      ('extraclasses', 'grade'), 
                      ('extracurriculars', 'college')])

# no, yes
extraclasses = TabularCPD('extraclasses', 2, [[0.35], [0.65]])
extracurriculars = TabularCPD('extracurriculars', 2, [[0.25], [0.75]])

# F C B A
gradecpd = norm_2d([[0.9, 0.1], 
                    [0.35, 0.3],
                    [0.25, 0.5],
                    [0.1, 0.95]])

grade = TabularCPD('grade', 4, gradecpd, 
                  evidence=['extraclasses'],
                  evidence_card=[2])

# A 0 
collegecpd = norm_2d([[99, 80, 5, 2, 80, 60, 10, 10],
                      [1, 2, 40, 55, 20, 40, 88, 99]])

college = TabularCPD('college', 2, collegecpd, 
                    evidence=['grade', 'extracurriculars'],
                    evidence_card=[4, 2])

se = norm_2d([[1, 1, 1, 1, 90, 60, 2, 2 ],
             [0, 0, 0, 0, 2, 5, 75, 60 ]])

scholarship = TabularCPD('scholarship', 2, se,
                         evidence=["college", 'grade'],
                         evidence_card=[2,4])

model.add_cpds(extraclasses)
model.add_cpds(extracurriculars)
model.add_cpds(grade)
model.add_cpds(scholarship)
model.add_cpds(college)

inf = BayesianModelSampling(model)
samples = inf.forward_sample(size=500, return_type='dataframe')
features = ['extracurriculars', 'college', 'extraclasses', 'scholarship']
X = samples.loc[:,features]
accuracy = cross_val_score(clf, X, samples['grade'], cv=skf)
accuracy

array([ 0.51960784,  0.72      ,  0.75      ,  0.80808081,  0.70707071])

In [81]:
ag = [[2],[3],[3],[3],[3],[0.5],[0.4],[0.1]]
ag_norm = norm_2d(ag)
age = TabularCPD('age', 8, ag_norm)
#schoolsupport = TabularCPD('schoolsupport', 2, [[0.35], [0.65]])

In [13]:
ag = [[2],[3],[3],[3],[3]]
ag_norm = norm_2d(ag)
age = TabularCPD('age', 5, ag_norm)

#extraclasses = TabularCPD('extraclasses', 2, [[0.4], [0.6]])

mj = norm_2d([[0.25], [0.75]])
motherhasjob = TabularCPD('motherhasjob', 2, mj)

fj = norm_2d([[0.15], [0.85]])
fatherhasjob = TabularCPD('fatherhasjob', 2, fj)

# motherhasjob = 1, fatherhasjob = 1
fi = norm_2d([[0.99, 0.75, 0.65, 0.1],
              [0.01, 0.25, 0.35, 0.9]])
             
familyincome = TabularCPD('familyincome', 2, fi, 
                         evidence=['motherhasjob','fatherhasjob'],
                         evidence_card=[2,2])
# g A B C F 
# extraclasses 1 college 0             
extraclasses = TabularCPD('extraclasses', 2, 
                          [[0.8, 0.1], 
                           [0.2, 0.9]], 
                         evidence=['familyincome'], 
                         evidence_card=[2])

familysupport = TabularCPD('familysupport', 2, 
                          [[0.95, 0.05], 
                           [0.05, 0.95]], 
                         evidence=['familyincome'],
                         evidence_card=[2])

extracurriculars = TabularCPD('extracurriculars', 2, [[0.4],[0.6]])

# 0
jnorm = norm_2d([[0.2, 0.9], 
                 [0.8, 0.1]])

job = TabularCPD('job', 2, jnorm, evidence=['extracurriculars'], evidence_card=[2])
st = norm_2d([[0.9, 0.1],
              [0.75, 0.2],
              [0.2, 0.8],
              [0.05, 0.9]])

studytime = TabularCPD('studytime', 4, st,
                       evidence=['job'],
                       evidence_card=[2])

# ec = 1, famincome = 1, studytime=2 F C B A
g = norm_2d([[90, 90, 2, 1,  30, 20, 2, 1,   2, 5, 2, 1,    2, 5, 2, 1],
             [4, 10, 3, 2,   85, 90, 2, 1,   60, 70, 2, 1,  50, 5, 2, 1],
             [2, 1, 85, 25,  5, 5, 90, 5,    5, 10, 70, 3,  3, 50, 10, 5],
             [1, 1, 2, 85,   2, 2, 1, 90,    2, 2, 10, 55,  2, 3, 85, 90]])
grade = TabularCPD('grade', 4, g, 
                     evidence=['extraclasses', 'familysupport', 'studytime'],
                     evidence_card=[2,2,4])

co = norm_2d([[80, 40, 2, 2 ],
     [2, 5, 65, 55 ]])
college = TabularCPD('college', 2, co,
                         evidence=['grade'],
                         evidence_card=[4])

s = norm_2d([[1, 1, 1, 1, 90, 60, 2, 2 ],
             [0, 0, 0, 0, 2, 5, 75, 60 ]])
scholarship = TabularCPD('scholarship', 2, s,
                         evidence=["college", 'grade'],
                         evidence_card=[2,4])


model = BayesianModel([('extracurriculars', 'job'),
                        ('job', 'studytime'),
                       ('motherhasjob', 'familyincome'),
                       ('fatherhasjob', 'familyincome'),
                    ('familyincome', 'familysupport'), 
                    ('familyincome', 'extraclasses'), 
                    ('extraclasses', 'grade'), 
                    ('familysupport', 'grade'),
                    ('studytime', 'grade'),
                   ('grade', 'scholarship'),
                   ('grade', 'college'),
                   ('college', 'scholarship')])
#print model.edges
model.add_node('age')
model.add_cpds(age)
model.add_cpds(fatherhasjob)
model.add_cpds(motherhasjob)
model.add_cpds(familyincome)
model.add_cpds(familysupport)
model.add_cpds(extraclasses)
model.add_cpds(extracurriculars)
model.add_cpds(job)
model.add_cpds(studytime)
model.add_cpds(grade)
model.add_cpds(scholarship)
model.add_cpds(college)
inf = BayesianModelSampling(model)
samples = inf.forward_sample(size=500, return_type='dataframe')

features = ['familysupport', 'studytime', 'extraclasses', 'scholarship']
X = samples.loc[:,features]
index = 5
accuracy = cross_val_score(clf, X, samples['grade'], cv=skf)
accuracy

array([ 0.88118812,  0.85148515,  0.87128713,  0.8989899 ,  0.85714286])

In [163]:
p.stop_vm()

In [89]:
tetrad.run(algoId = 'fges', dfs = samples, scoreId = 'sem-bic', dataType = 'continuous',
penaltyDiscount = 2, maxDegree = -1, faithfulnessAssumed = True, verbose = True)

dot_str = pc.tetradGraphToDot(tetrad.getTetradGraph())
graphs = pydot.graph_from_dot_data(dot_str)
svg_str = graphs[0].create_svg()
SVG(svg_str)

TypeError: unbound method tetradGraphToDot() must be called with pycausal instance as first argument (got JWrapper instance instead)

In [90]:
cols = list(samples)
# move the column to head of list using index, pop and insert
cols.append(cols.pop(cols.index('grade')))
samples = samples.loc[:, cols]
samples.head()

Unnamed: 0,extraclasses,extracurriculars,college,scholarship,grade
0,1,1,0,0,2
1,1,0,0,0,1
2,1,1,1,1,3
3,1,1,1,1,3
4,1,1,1,1,3


In [104]:
samples.to_csv('datafile.csv', index=False)

In [None]:
samples['age'].unique()

In [103]:
samples.head()

Unnamed: 0,extraclasses,extracurriculars,college,scholarship,grade
0,yes,yes,no,no,B
1,yes,no,no,no,C
2,yes,yes,yes,yes,A
3,yes,yes,yes,yes,A
4,yes,yes,yes,yes,A


In [None]:
convert_values(samples, 'motherhasjob', [0,1], ['yes', 'no'])
#convert_values(samples, 'fatherhasjob', [0,1], ['no', 'yes'])
convert_values(samples, 'extracurriculars', [0,1], ['no', 'yes'])
convert_values(samples, 'job', [0,1], ['no', 'yes'])
convert_values(samples, 'studytime', [0,1,2,3], ["less_than_2hr", "2_to_5hr", "5_to_10hr", "greater_than_10"])
convert_values(samples, 'age', [0,1,2,3,4], [14,15,16,17,18])
convert_values(samples, 'familyincome', [0,1], ['below_avg', 'above_avg'])
convert_values(samples, 'extraclasses', [0,1], ['no', 'yes'])
convert_values(samples, 'familysupport', [0,1], ['no', 'yes'])
convert_values(samples, 'grade', [0,1, 2, 3], ["F", "C", "B", "A"])
convert_values(samples, 'college', [0,1], ['no', 'yes'])
convert_values(samples, 'scholarship', [0,1], ['no', 'yes'])

In [10]:
def convert_values(df, feature_name, old_values, new_values):
    for i in range(0, df[feature_name].size):
        for j, value in enumerate(old_values):
            if (df[feature_name][i] == value):
                df[feature_name][i] = new_values[j]

In [102]:
convert_values(samples, 'grade', [0,1, 2, 3], ["F", "C", "B", "A"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [25]:
extraclasses.marginalize(['famsupport'])
extraclasses.get_values()

ValueError: famsupport not in scope.

In [None]:
co = norm_2d([[5, 4, 2, 1, 1], 
              [1, 1, 1, 3, 5]])

In [None]:
motheredu = TabularCPD('motheredu', 5, [[0.15],[0.35],[0.15],[0.15],[0.2]])
college = TabularCPD('college', 2, co,
                    evidence=['motheredu'],
                    evidence_card=[5])

In [None]:
college.marginalize(['motheredu'])
college.get_values()

In [None]:
# alcw = 1, freetime=0
go = norm_2d([[5, 4, 1, 1, 0.4, 0.3, 0.4, 0.3, 0.3, 0.4, 0.4, 0.3, 0.05, 0.1, 0.1, 0.1],
              [3, 3, 2, 2, 0.5, 0.6, 0.6, 0.5, 0.3, 0.4, 0.4, 0.3, 0.1, 0.2, 0.1, 0.1],
              [2, 2, 3, 4, 0.3, 0.4, 0.4, 0.3, 0.5, 0.6, 0.6, 0.5, 0.3, 0.4, 0.4, 0.3],
              [1, 1, 3, 5, 0.3, 0.4, 0.4, 0.3, 0.5, 0.6, 0.6, 0.5, 0.5, 0.6, 0.6, 0.5]])
st = norm_2d([[0.1, 0.21, 0.10, 0.4],
                                        [0.2, 0.25, 0.36, 0.4],
                                        [0.45, 0.44, 0.3, 0.15],
                                        [0.5, 0.330, 0.2, 0.05]])

In [None]:
ft = norm_2d([[0.3], [0.4], [0.3], [0.2]])

In [None]:
alcweekend = TabularCPD('alcweekend', 4, [[0.2, 0.3, 0.3, 0.2]])
freetime = TabularCPD('freetimerating', 4, ft)
goingout = TabularCPD('goingoutrating', 4, go,
                     evidence=['alcweekend', 'freetimerating'],
                     evidence_card=[4,4])
studytime = TabularCPD('studytime', 4, st, 
                      evidence=['alcweekend'],
                      evidence_card=[4])


In [None]:
goingout.marginalize(['alcweekend'])
goingout.get_values()
studytime.marginalize(['alcweekend'])
studytime.get_values()

In [None]:
g = norm_2d([[0.5, 0.6, 0.3, 0.2, 0.4, 0.3, 0.2, 0.2, 0.2, 0.3, 0.15, 0.15, 0.05, 0.05, 0.1, 0.1],
                                [0.3, 0.4, 0.3, 0.3, 0.5, 0.6, 0.6, 0.5, 0.3, 0.4, 0.4, 0.3, 0.1, 0.2, 0.1, 0.1],
                                [0.2, 0.2, 0.25, 0.3, 0.2, 0.2, 0.4, 0.3, 0.45, 0.55, 0.5, 0.5, 0.2, 0.2, 0.4, 0.3],
                                [0.2, 0.2, 0.5, 0.4, 0.2, 0.2, 0.35, 0.35, 0.2, 0.25, 0.4, 0.4, 0.1, 0.2, 0.6, 0.7]])

f = norm_2d([[0.05, 0.1, 0.65, 0.75, 0.05, 0.12, 0.75, 0.6, 0.15, 0.1, 0.15, 0.15, 0.05, 0.1, 0.1, 0.1],
                                    [0.1, 0.2, 0.35, 0.55, 0.1, 0.1, 0.3, 0.5, 0.1, 0.3, 0.4, 0.3, 0.2, 0.3, 0.2, 0.2],
                                    [0.2, 0.4, 0.3, 0.2, 0.3, 0.4, 0.4, 0.3, 0.5, 0.6, 0.6, 0.5, 0.3, 0.4, 0.4, 0.3],
                                    [0.2, 0.4, 0.3, 0.2, 0.3, 0.4, 0.2, 0.3, 0.6, 0.7, 0.2, 0.1, 0.75, 0.55, 0.1, 0.1]])

In [None]:
fe = norm_2d([[0.2166666666666667], [0.25], [0.16666666666666669], [0.16666666666666669], [0.2]])

In [None]:
grade = TabularCPD('grade', 4, g, 
                     evidence=['extraclasses', 'college'],
                     evidence_card=[4,4])

failure = TabularCPD('failure', 4, f, 
                     evidence=['goingoutrating', 'grade'],
                     evidence_card=[4,4])

In [None]:
grade.marginalize(['extraclasses'])
grade.get_values()

In [None]:
fe = norm_2d([[0.2],[0.45],[0.15],[0.35],[0.2]])

In [None]:
fatheredu = TabularCPD('fatheredu', 5, fe)

In [None]:
student.add_cpds(fatheredu)

In [None]:
student.add_cpds(familysupport)
student.add_cpds(extraclasses)
student.add_cpds(motheredu)
student.add_cpds(college)
student.add_cpds(grade)
student.add_cpds(failure)
student.add_cpds(fatheredu)
student.add_cpds(alcweekend)
student.add_cpds(freetime)
student.add_cpds(goingout)
student.add_cpds(studytime)
student.add_cpds(age)
student.add_cpds(schoolsupport)

In [None]:
inference = BayesianModelSampling(student)
samples = inference.forward_sample(size=1000, return_type='dataframe')

In [None]:
samples.head()

In [None]:
dot_str

In [None]:
X = samples.drop(['grade'], axis=1)
X.head()

In [None]:
y = pd.DataFrame(samples['grade'])
y.head()

In [None]:
X.columns

In [14]:
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    GaussianNB()]
index = 1
clf = classifiers[index]
skf = StratifiedKFold(n_splits=5)

In [None]:
features = ['college', 'extraclasses']
X = samples.loc[:,features]
X.head()

In [11]:
index = 0
clf = classifiers[index]
skf = StratifiedKFold(n_splits=5)
#y_pred = cross_val_predict(clf, X, y['grade'], cv=skf)

In [None]:
features = ['extraclasses', 'college']
X = s.loc[:,features]
index = 5
accuracy = cross_val_score(clf, X, s['grade'], cv=skf)
accuracy

In [19]:
features = ['extraclasses', 'college']
X = s.loc[:,features]
index = 1
clf = classifiers[index]
clf.fit(X,s['grade'])
clf.score(X, s['grade'])

AttributeError: 'list' object has no attribute 'loc'

In [108]:
from pgmpy.factors.continuous import ContinuousFactor
from pgmpy.models import LinearGaussianBayesianNetwork

In [15]:
weather = BayesianModel([("WeatherCondition", "Precipitation"), 
  ("Humidity","WeatherCondition"), 
  ("LowTemp", "WeatherCondition"), 
  ("HistoricAvgTemp", "LowTemp"), 
  ("AirPressure", "Humidity"),
  ("HistoricAvgRainfall", "Precipitation")])

# 29.0, 29.5, 30 
AirPressure = TabularCPD('AirPressure', 3, [[0.35], [0.3], [0.35]])

# 50 60 70 80
HistoricAvgTempcpd = norm_2d([[0.2], [0.8], [0.4], [0.2]])
HistoricAvgTemp = TabularCPD('HistoricAvgTemp', 4, [ [0.25], [0.3], [0.2], [0.25]])

# < 1 in, < 2 in, >2 in
HistoricAvgRainfall = TabularCPD('HistoricAvgRainfall', 3, [[0.25], [0.25], [0.5]])

# low, mid, high
humiditycpd = norm_2d([[1, 10, 90], 
                       [10, 90, 10],
                       [90, 10, 1]])

Humidity = TabularCPD('Humidity', 3, humiditycpd, evidence=['AirPressure'], evidence_card=[3])

# 50 60 70 80
LowTempcpd = norm_2d([[90, 10, 1, 1], 
                       [10, 90, 10, 1],
                       [1, 10, 90, 1], 
                      [1, 1, 2, 90]])
LowTemp = TabularCPD('LowTemp', 4, LowTempcpd, evidence=['HistoricAvgTemp'], evidence_card=[4])

# high 40
# sunny, foggy, rainy
WeatherConditioncpd = norm_2d([[70, 80, 80, 99, 10, 10, 10, 10, 2, 1, 1, 1], 
                             [5, 5, 5, 5, 80, 80,10,10, 90, 80, 10, 5],
                             [1, 2, 2, 2, 10, 20, 50, 70, 1, 1, 80, 90]])
WeatherCondition = TabularCPD('WeatherCondition', 3, WeatherConditioncpd, 
                              evidence=['Humidity', 'LowTemp'], evidence_card=[3, 4])

# < 1 in, < 2 in, >2 in
Precipitationcpd = norm_2d([[99, 80, 70, 50, 1, 10, 90, 2, 1],
                           [1, 5, 5, 2, 70, 1, 10, 90,2], 
                           [1, 1, 1, 2, 5, 70, 2, 1, 90]])

Precipitation = TabularCPD('Precipitation', 3, Precipitationcpd, 
                              evidence=['WeatherCondition', 'HistoricAvgRainfall'], evidence_card=[3, 3])

weather.add_cpds(AirPressure, HistoricAvgTemp, Humidity, LowTemp, WeatherCondition, Precipitation, HistoricAvgRainfall)
inf = BayesianModelSampling(weather)
samples = inf.forward_sample(size=200, return_type='dataframe')
features = ['Precipitation', 'Humidity', 'LowTemp', 'HistoricAvgRainfall']
X = samples.loc[:,features]
accuracy = cross_val_score(clf, X, samples['WeatherCondition'], cv=skf)
print accuracy
features = ['Humidity', 'LowTemp']
X = samples.loc[:,features]
accuracy = cross_val_score(clf, X, samples['WeatherCondition'], cv=skf)
print accuracy

[ 0.9047619   0.85        0.9         0.79487179  0.8974359 ]
[ 0.85714286  0.875       0.925       0.74358974  0.82051282]


In [16]:
features = ['Precipitation', 'AirPressure', 'LowTemp', 'HistoricAvgRainfall']
X = samples.loc[:,features]
accuracy = cross_val_score(clf, X, samples['WeatherCondition'], cv=skf)
print accuracy

[ 0.88095238  0.85        0.875       0.76923077  0.87179487]


In [17]:
features = ['Precipitation', 'AirPressure', 'LowTemp', 'HistoricAvgTemp']
X = samples.loc[:,features]
accuracy = cross_val_score(clf, X, samples['WeatherCondition'], cv=skf)
print accuracy

[ 0.88095238  0.85        0.85        0.71794872  0.87179487]


In [26]:
p = pycausal()
p.start_vm()
tetrad = search.tetradrunner()
tetrad.run(algoId = 'fges', dfs = samples, scoreId = 'sem-bic', dataType = 'continuous',penaltyDiscount = 2, maxDegree = -1, faithfulnessAssumed = True, verbose = True)

In [28]:
tetrad.getTetradGraph()

Instance of edu.cmu.tetrad.graph.EdgeListGraphSingleConnections: Graph Nodes:
HistoricAvgTemp,LowTemp,HistoricAvgRainfall,AirPressure,Humidity,WeatherCondition,Precipitation

Graph Edges:
1. Humidity --- AirPressure
2. Humidity --- WeatherCondition
3. LowTemp --- HistoricAvgTemp
4. Precipitation --- HistoricAvgRainfall
5. WeatherCondition --- Precipitation

In [27]:

dot_str = pycausal.tetradGraphToDot(tetrad.getTetradGraph())
graphs = pydot.graph_from_dot_data(dot_str)
svg_str = graphs[0].create_svg()
SVG(svg_str)

TypeError: unbound method tetradGraphToDot() must be called with pycausal instance as first argument (got JWrapper instance instead)

In [160]:
convert_values(samples, 'HistoricAvgTemp', [0,1,2,3], ['50-59F', '60-69F', '70-79F', '80-89F'])

In [162]:
convert_values(samples, 'LowTemp', [0,1,2], ['50-59F', '60-69F', '70-79F', '80-89F'])

In [165]:
convert_values(samples, 'HistoricAvgRainfall', [0,1,2], ['0-1in', '1-2in', '>2in'])

In [167]:
convert_values(samples, 'AirPressure', [0,1,2], ['29-29.5in', '29.5-30in', '>30in'])

In [169]:
convert_values(samples, 'Humidity', [0,1,2], ['low', 'medium', 'high'])

In [170]:
convert_values(samples, 'WeatherCondition', [0,1,2], ['sunny', 'foggy', 'rainy'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [171]:
convert_values(samples, 'Precipitation', [0,1,2], ['0-1in', '1-2in', '>2in'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [173]:
cols = list(samples)
# move the column to head of list using index, pop and insert
cols.append(cols.pop(cols.index('WeatherCondition')))
samples = samples.loc[:, cols]
samples.head()

Unnamed: 0,HistoricAvgTemp,LowTemp,HistoricAvgRainfall,AirPressure,Humidity,Precipitation,WeatherCondition
0,80-89F,70-79F,>2in,29-29.5in,high,>2in,rainy
1,50-59F,50-59F,>2in,>30in,low,0-1in,sunny
2,70-79F,70-79F,1-2in,>30in,medium,1-2in,rainy
3,60-69F,60-69F,1-2in,>30in,low,0-1in,sunny
4,70-79F,70-79F,1-2in,>30in,low,1-2in,foggy


In [174]:
samples.to_csv('datafile_weather.csv', index=False)