-
Notifications
You must be signed in to change notification settings - Fork 0
/
PredictiveClassificationModel.py
65 lines (51 loc) · 2.87 KB
/
PredictiveClassificationModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from pyspark.ml.classification import LogisticRegression, RandomForestClassifier
from pyspark.sql import SparkSession
from PredictionAlgorithms.PredictiveUtilities import PredictiveUtilities
spark = \
SparkSession.builder.appName('predictive_Analysis').master('local[*]').getOrCreate()
spark.sparkContext.setLogLevel('ERROR')
class PredictiveClassificationModel():
def __init__(self, trainDataRatio, dataset_add, feature_colm, label_colm, relation_list,
relation, userId, locationAddress, algoName,spark):
self.trainDataRatio = trainDataRatio
self.datasetAdd = dataset_add
self.featuresColmList = feature_colm
self.labelColmList = label_colm
self.relationshipList = relation_list
self.relation = relation
self.userId = userId
self.locationAddress = locationAddress
self.algoName = algoName
self.spark = spark
# only for etlpart of the dataset
self.predictiveUtilitiesObj = PredictiveUtilities()
ETLOnDatasetStats = \
self.predictiveUtilitiesObj.ETLOnDataset(datasetAdd=self.datasetAdd,
featuresColmList=self.featuresColmList,
labelColmList=self.labelColmList,
relationshipList=self.relationshipList,
relation=self.relation,
trainDataRatio=self.trainDataRatio,
spark=spark,
userId=userId)
self.dataset = ETLOnDatasetStats.get("dataset")
self.featuresColm = ETLOnDatasetStats.get("featuresColm")
self.labelColm = ETLOnDatasetStats.get("labelColm")
self.trainData = ETLOnDatasetStats.get("trainData")
self.testData = ETLOnDatasetStats.get("testData")
self.idNameFeaturesOrdered = ETLOnDatasetStats.get("idNameFeaturesOrdered")
def classificationModelStat(self,classifier):
trainingSummary = classifier.summary
def logisticRegression(self):
#family = auto,multinomial and bionomial
logisticRegressionModelFit = \
LogisticRegression(featuresCol=self.featuresColm, labelCol=self.labelColm,
maxIter=5,regParam=0.1, elasticNetParam=1.0,
threshold=0.3,family="auto")
classifier = logisticRegressionModelFit.fit(self.trainData)
def randomForestClassifierModel(self):
randomForestClassifierModelFit = \
RandomForestClassifier(labelCol=self.labelColm,
featuresCol=self.featuresColm,
numTrees=10)
classifier = randomForestClassifierModelFit.fit(self.trainData)