In [1]:
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.classification import LogisticRegressionWithSGD

def parseData(line):
  """Parses single line given to the function
  returns the LabeledPoint out of the given data record
  """

  values = map(int, line.split(','))
  return LabeledPoint(values[-1] if values[-1] == 1 else 0, values[:-1])


def getAccuracy(split_weights, parsedData):
  """ Splits the data into training and testing splits.
  Trains the model on training data.
  Checks the accuracy of the model using testing data.
  Returns the accuracy parameter.
  """

  seed = 42
  parsedTrainData, parsedTestData = parsedData.randomSplit(split_weights, seed)
  parsedTrainData.cache()
  parsedTestData.cache()
  model = LogisticRegressionWithSGD.train(parsedTrainData)
  labelsAndPreds = parsedTestData.map(lambda lp: (lp.label, model.predict(lp.features)))
  truePredictions = labelsAndPreds.filter(lambda x: x[0] == x[1])
  falsePredictions = labelsAndPreds.filter(lambda x: x[0] != x[1])
  return float(truePredictions.count()) / (truePredictions.count() + falsePredictions.count())



"""Upload the csv dataset file to Databricks using Tables tab from the left panel
Click on Tables ==> Create Table ==> Drop file or click here to upload ==> Select the CSV File ==> Open
When the upload is finished, click 'Preview Table', give it a table name, choose file type as CSV and ',' as delimiter.
Tick First row is header and give the headers appropriate names
Then click 'Create Table'.
"""

parsedData = sc.textFile('dbfs:/FileStore/tables/1gcy7oeb1486424914458/data.csv').filter(lambda x: not x.startswith('having_IP_Address')).map(parseData).cache()
weights = [[.1, .9], [.2, .8], [.3, .7], [.4, .6], [.5, .5], [.6, .4], [.7, .3], [.8, .2], [.9, .1]]

"""Using display in front of the results makes databricks try to make sense of data and configure plot for us
The plot can be changed and configured manually later from the options given.
"""

display([(weight[0], getAccuracy(weight, parsedData)) for weight in weights])