In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
from sklearn.cross_validation import train_test_split
import random



### Iris Dataset

In [2]:

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']
def load_data(label_name='Species'):
    """Parses the csv file in TRAIN_URL and TEST_URL."""

    # Create a local copy of the training set.
    train_path = tf.keras.utils.get_file(fname=TRAIN_URL.split('/')[-1],
                                         origin=TRAIN_URL)
    # train_path now holds the pathname: ~/.keras/datasets/iris_training.csv

    # Parse the local CSV file.
    train = pd.read_csv(filepath_or_buffer=train_path,
                        names=CSV_COLUMN_NAMES,  # list of column names
                        header=0  # ignore the first row of the CSV file.
                       )
    # train now holds a pandas DataFrame, which is data structure
    # analogous to a table.

    # 1. Assign the DataFrame's labels (the right-most column) to train_label.
    # 2. Delete (pop) the labels from the DataFrame.
    # 3. Assign the remainder of the DataFrame to train_features
    train_features, train_label = train, train.pop(label_name)

    # Apply the preceding logic to the test set.
    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
    test_features, test_label = test, test.pop(label_name)

    # Return four DataFrames.
    return (train_features, train_label), (test_features, test_label)
(x_train, y_train), (x_test, y_test) = load_data()

In [3]:
x_train = np.array(x_train)
x_test = np.array(x_test)

### Weather Report Dataset

In [4]:
from sklearn.model_selection import train_test_split
train_df = pd.read_csv('datatraining.txt')
train = train_df.drop(['date'],axis=1)
label = train_df['Occupancy']
train = train.drop(['Occupancy'],axis=1)
x_train, x_test,y_train,y_test = train_test_split(train, label ,test_size=0.2)

x_train = np.array(x_train)
x_test = np.array(x_test)

### Logistic Regression
model building from scratch

In [5]:
def sigmoid(z):
        return 1.0/(1+np.exp(-z))

class LogisticRegression():
    """
        Parameters:
        -----------
        n_iterations: int
            梯度下降的轮数
        learning_rate: float
            梯度下降学习率
    """
    def __init__(self,learning_rate, n_iters):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        
    def weight_initialize(self, n_features):
        limit = np.sqrt(1/n_features)
        w = np.random.uniform(-limit,limit, (n_features,1))
        b = 0
        self.w = np.insert(w, 0, b, axis=0)      
        
    def fit(self, X, y):
        print('------------------------------------')
        print('Start training...')
        print('Leanring Rate: {}  Iterations: {}'.format(self.learning_rate,self.n_iters))
        print('------------------------------------')
        m_samples, n_features = X.shape
        self.weight_initialize(n_features)
        X = np.insert(X, 0, 1, axis=1)
        y = np.reshape(y, (m_samples, 1))
    
        for i in range(self.n_iters):
            h_x = X.dot(self.w)
            y_pred = sigmoid(h_x)
            w_grad = X.T.dot(y_pred - y)
            loss = np.sum(np.square(y_pred-y))/len(y)
            print('In iter{} loss: {}'.format(i,round(loss,4)))
            self.w = self.w - self.learning_rate * w_grad
            
    def predict(self, X):
        print('------------------------------------')
        print('Start predicting...')
        X = np.insert(X, 0, 1, axis=1)
        h_x = X.dot(self.w)
        y_pred = np.round(sigmoid(h_x))
        return y_pred.astype(int)

### Model testing

In [6]:
clf = LogisticRegression(0.001,20)
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)

  return getattr(obj, method)(*args, **kwds)


------------------------------------
Start training...
Leanring Rate: 0.001  Iterations: 20
------------------------------------
In iter0 loss: 0.7855
In iter1 loss: 0.214
In iter2 loss: 0.2143
In iter3 loss: 0.786
In iter4 loss: 0.2143
In iter5 loss: 0.1075
In iter6 loss: 0.0807
In iter7 loss: 0.0846
In iter8 loss: 0.082
In iter9 loss: 0.0849
In iter10 loss: 0.0811
In iter11 loss: 0.085
In iter12 loss: 0.0718
In iter13 loss: 0.1007
In iter14 loss: 0.1076
In iter15 loss: 0.1064
In iter16 loss: 0.1874
In iter17 loss: 0.2099
In iter18 loss: 0.786
In iter19 loss: 0.2145
------------------------------------
Start predicting...


  


In [7]:
from sklearn.metrics import accuracy_score
accuracy_score(y_pred,y_test)

0.9343155310006138