Link - Notebook de trabajo
===

[Link - Notebook de trabajo](https://drive.google.com/drive/folders/19McT1MaZ_byTXtK6ENe2Qfb25oQHGDQB?usp=sharing)

Alumno: Jose Ysique u201616533



Dataset
===



## Contexto
Este conjunto de datos es originalmente del Instituto Nacional de Diabetes y Enfermedades Digestivas y Renales. El objetivo del conjunto de datos es predecir de forma diagnóstica si un paciente tiene diabetes o no, basándose en determinadas medidas de diagnóstico incluidas en el conjunto de datos. Se impusieron varias restricciones a la selección de estas instancias de una base de datos más grande. En particular, todos los pacientes aquí son mujeres de al menos 21 años de edad de origen indio Pima.

## Contenido
Los conjuntos de datos constan de varias variables predictoras médicas y una variable objetivo, Resultado. Las variables predictoras incluyen el número de embarazos que ha tenido la paciente, su IMC, nivel de insulina, edad, etc.

[Link - Kaggle - Pima Indians Diabetes Database](https://www.kaggle.com/uciml/pima-indians-diabetes-database).


[Datasets - Github](https://github.com/jysique/datasets)

Neural Network
===

Las redes neuronales son un modelo computacional inspirado en el comportamiento observado en su homólogo biológico​. Consiste en un conjunto de unidades, llamadas neuronas artificiales, conectadas entre sí para transmitirse señales. La información de entrada atraviesa la red neuronal produciendo unos valores de salida.

Cada neurona está conectada con otras a través de unos enlaces. En estos enlaces el valor de salida de la neurona anterior es multiplicado por un valor de peso. Estos pesos en los enlaces pueden incrementar o inhibir el estado de activación de las neuronas adyacentes. Del mismo modo, a la salida de la neurona, puede existir una función limitadora o umbral, que modifica el valor resultado o impone un límite que no se debe sobrepasar antes de propagarse a otra neurona. Esta función se conoce como función de activación.

Estos sistemas aprenden y se forman a sí mismos, en lugar de ser programados de forma explícita, y sobresalen en áreas donde la detección de soluciones o características es difícil de expresar con la programación convencional. Para realizar este aprendizaje automático, normalmente, se intenta minimizar una función de pérdida que evalúa la red en su total. Los valores de los pesos de las neuronas se van actualizando buscando reducir el valor de la función de pérdida. Este proceso se realiza mediante la propagación hacia atrás.

In [None]:
!apt install golang-go spin 1> /dev/null && go version && spin -V



go version go1.10.4 linux/amd64
Spin Version 6.4.6 -- 2 December 2016


In [None]:
%%writefile nn.go
package main

import (
	"math"
	"math/rand"
	"fmt"
	"log"
	"time"
	"strconv"
	"encoding/csv"
	"net/http"
)

var columnsNamesNN []string
var columnsNN map[string]int
var dataframeNN [][]float64

var dataTest [][]float64
var dataTrain [][]float64

var xDataTest [][]float64
var xDataTrain [][]float64

var yDataTest [][]float64
var yDataTrain [][]float64

var predictedResults [][]float64
var confusionMatrix [][]int

func readArchiveCSV(url string) ([]string, map[string]int, [][]float64) {
	resp, err := http.Get(url)

	if err != nil {
		log.Fatal("No se puede leer el archivo de entrada ", err)
	}
	defer resp.Body.Close()
	csvReader := csv.NewReader(resp.Body)

	fileData, err := csvReader.ReadAll()
	if err != nil {
		log.Fatal("No se puede parsear el archivo de entrada ", err)
	}

	headers := make([]string, len(fileData[0]))
	copy(headers, fileData[0])

	columns := make(map[string]int)
	for i, header := range headers {
		columns[header] = i
	}

	fileData = fileData[1:]
	fileDataReal := make([][]float64, len(fileData))

	for i := range fileDataReal {
		fileDataReal[i] = make([]float64, len(headers))
		for j := range fileDataReal[i] {
			val, _ := strconv.ParseFloat(fileData[i][j], 64)
			fileDataReal[i][j] = float64(val)
		}
	}

	return headers, columns, fileDataReal
}

func splitPercent( fileData [][]float64, percentSplit float64) ([][]float64,[][]float64) {
	newfileData1:= make([][]float64, 0)
  newfileData2:= make([][]float64, 0)
	
	for i := 0; i < len(fileData); i++ {
			s1 := rand.NewSource(time.Now().UnixNano())
			r1 := rand.New(s1)
      if percentSplit < r1.Float64() {
          newfileData1 = append(newfileData1,fileData[i])
      }else{
          newfileData2 = append(newfileData2,fileData[i])
      }
	}
	return newfileData1, newfileData2
}


func splitColumns(headers []string, columns map[string]int, fileData [][]float64, newheaders []string) ([]string, map[string]int, [][]float64) {

	temp := make([]int, len(newheaders))
	newfileData := make([][]float64, len(fileData))

	for i, newh := range newheaders {
		temp[i] = columns[newh]
	}

	for i := range newfileData {
		newfileData[i] = make([]float64, len(temp))
		for j, t := range temp {
			newfileData[i][j] = fileData[i][t]
		}
	}

	newcolumns := make(map[string]int)

	for i, header := range newheaders {
		newcolumns[header] = i
	}
	return newheaders, newcolumns, newfileData
}

func GetMinMax(array []float64) (float64, float64) {
    var max float64 = float64(0)
    var min float64 = float64(0)
    for _, value := range array {
        if max < value {
            max = value
        }
        if min > value {
            min = value
        }
    }
    return min, max
}

func GetCol(arr [][]float64, colID int) []float64 {
	out := []float64{} 
	for _, row := range arr {
		out = append(out, row[colID])
	}
	return out
}


func normalizeData(fileData [][]float64) ([][]float64) {
	newfileData := make([][]float64, len(fileData))
	for i := 0; i < len(fileData); i++ {
		newfileData[i] = make([]float64, len(fileData[i]))
	}

	min := make([]float64, len(fileData[0]))
	max := make([]float64, len(fileData[0]))
 
	for i := 0; i < len(fileData[0]); i++ {
		min[i], max[i] = GetMinMax(GetCol(fileData,i))
		for j := 0; j < len(fileData); j++ {
			newfileData[j][i] = (fileData[j][i] - min[i]) / (max[i] - min[i])
		}
	}

	return newfileData
}



type NeuralNetwork struct{
	mHiddenLayer []*Neural
	mInputLayer  []*Neural
	mOutputLayer []*Neural
	mWeightHidden [][]float64
	mWeightOutput [][]float64
	mLastChangeHidden [][]float64
	mLastChangeOutput [][]float64
	mOutput []float64
	mForwardDone chan bool
	mFeedbackDone chan bool
	mRegression bool
	mRate1 float64 //learning rate
	mRate2 float64
}


func sigmoid(x float64) float64 {
	return 1 / (1 + math.Exp(-x))
}

func dsigmoid(y float64) float64 {
	return y * (1 - y)
}

func makeMatrix(rows,colums int, value float64) [][]float64{
	mat := make([][]float64,rows)
	for i:=0;i<rows;i++{
		mat[i] = make([]float64,colums)
		for j:=0;j<colums;j++{
			mat[i][j] = value
		}
	}
	return mat
}

func randomMatrix(rows,colums int, lower, upper float64) [][]float64{
	mat := make([][]float64,rows)
	for i:=0;i<rows;i++{
		mat[i] = make([]float64,colums)
		for j:=0;j<colums;j++{
			mat[i][j] = rand.Float64()*(upper-lower) + lower
		}
	}
	return mat
}

func DefaultNetwork(iInputCount,iHiddenCount,iOutputCount int, iRegression bool) (*NeuralNetwork) {
	return NewNetwork(iInputCount,iHiddenCount,iOutputCount,iRegression, 0.01,0.001)
}

func NewNetwork(iInputCount,iHiddenCount,iOutputCount int, iRegression bool,iRate1,iRate2 float64) (*NeuralNetwork){
	iInputCount +=1
	network := &NeuralNetwork{}
	network.mRegression = iRegression
	network.mOutput = make([]float64,iOutputCount)
	network.mForwardDone = make(chan bool)
	network.mFeedbackDone = make(chan bool)
	network.mInputLayer = make([]*Neural,iInputCount)
	network.mRate1 = iRate1
	network.mRate2 = iRate2
	for i:=0;i<iInputCount;i++{
		network.mInputLayer[i] = NewNeural(network,0,i,1)
	}
	network.mHiddenLayer = make([]*Neural,iHiddenCount)
	for i:=0;i<iHiddenCount;i++{
		network.mHiddenLayer[i] = NewNeural(network,1,i,iInputCount)
	}
	network.mOutputLayer = make([]*Neural,iOutputCount)
	for i:=0;i<iOutputCount;i++{
		network.mOutputLayer[i] = NewNeural(network,2,i,iHiddenCount)
	}

	network.mWeightHidden = randomMatrix(iInputCount,iHiddenCount,-0.2,0.2)
	network.mWeightOutput = randomMatrix(iHiddenCount,iOutputCount,-2.0,2.0)

	network.mLastChangeHidden = makeMatrix(iInputCount,iHiddenCount,0.0)
	network.mLastChangeOutput = makeMatrix(iHiddenCount,iOutputCount,0.0)

	return network
}

func (self * NeuralNetwork) Start(){//start all the neurals in the network
	for _,n := range self.mInputLayer{
		n.start(self.mRegression)
	}
	for _,n := range self.mHiddenLayer{
		n.start(self.mRegression)
	}
	for _,n := range self.mOutputLayer{
		n.start(self.mRegression)
	}
}

func (self * NeuralNetwork) Stop(){//start all the neurals in the network

	for _,n := range self.mInputLayer{
		close(n.mInputChan)
		close(n.mFeedbackChan)
	}
	for _,n := range self.mHiddenLayer{
		close(n.mInputChan)
		close(n.mFeedbackChan)
	}
	for _,n := range self.mOutputLayer{
		close(n.mInputChan)
		close(n.mFeedbackChan)
	}
	close(self.mForwardDone)
	close(self.mFeedbackDone)
}


func (self * NeuralNetwork) Forward(input []float64 ) (output []float64){
	if len(input)+1 != len(self.mInputLayer){
		panic("amount of input variable doesn't match")
	}
	go func(){
		for i:=0;i<len(self.mInputLayer)-1;i++{
			self.mInputLayer[i].mInputChan <- input[i]
		}
		self.mInputLayer[len(self.mInputLayer)-1].mInputChan  <- 1.0 //bias node
	}()
	for i:=0;i<len(self.mOutput);i++{
		<-self.mForwardDone
	}
	return self.mOutput[:]
}

func (self * NeuralNetwork) Feedback(target []float64) {
	go func(){
		for i:=0;i<len(self.mOutput);i++{
			self.mOutputLayer[i].mFeedbackChan <- target[i]
		}
	}()
	for i:=0;i<len(self.mHiddenLayer);i++{
		<- self.mFeedbackDone
	}

}

func (self * NeuralNetwork) CalcError( target []float64) float64{
	errSum := 0.0
	for i:=0;i<len(self.mOutput);i++{
		err := self.mOutput[i] - target[i]
		errSum += 0.5 * err * err
	}
	return errSum
}

func genRandomIdx(N int) []int{
	A := make([]int,N)
	for i:=0;i<N;i++{
		A[i]=i
	}
	//randomize
	for i:=0;i<N;i++{
		j := i+int(rand.Float64() * float64 (N-i))
		A[i],A[j] = A[j],A[i]
	}
	return A
}

func (self * NeuralNetwork) Train(inputs [][]float64, targets [][]float64, iteration int) {
	if len(inputs[0])+1 != len(self.mInputLayer){
		panic("amount of input variable doesn't match")
	}
	if len(targets[0]) != len(self.mOutputLayer){
		panic("amount of output variable doesn't match")
	}
	old_err1 := 1.0
	old_err2 := 2.0
	
	for i:=0;i<iteration;i++{
		idx_ary := genRandomIdx(len(inputs))
		for j:=0;j<len(inputs);j++{
			self.Forward(inputs[idx_ary[j]])
			self.Feedback(targets[idx_ary[j]])
		}
		if i%100==0 {
			last_target := targets[len(targets)-1]
			cur_err := self.CalcError(last_target)
			fmt.Println("err: ", cur_err)
			if (old_err2 - old_err1 < 0.001) && (old_err1 - cur_err  < 0.001){//early stop
				break
			}	
			old_err2 = old_err1
			old_err1 = cur_err
			
		}
	}
}

func (self * NeuralNetwork) ActiveFunction(a[] float64) []float64 {
	 var classValue [] float64
	 for i:= range a{
		 	if a[i] > 0.5{
		 		classValue = append(classValue,1)
	 		}else{
		 		classValue = append(classValue,0)
	 		}
	 }
	 return classValue
}


func (self * NeuralNetwork) Test(patternsX [][]float64, patternsY [][]float64, print bool) [][]float64 {
	predictedArray := make([][]float64, 0) 
	for i := range patternsX {
		output := self.Forward(patternsX[i])
    foo := make([] float64, 0)
    for j:= range output{
        calc := sigmoid(output[j])
        foo = append(foo, calc)
        if(print){
            fmt.Println(patternsX[i], "->\t\t", calc, "->\tClase Predicha",self.ActiveFunction(foo), ":", patternsY[i])
        }
    }
    predictedArray = append(predictedArray, foo)
	}
  return predictedArray 
}

type Neural struct{
	mInputChan chan float64
	mFeedbackChan chan float64
	mInputCount int
	mLayer int
	mNo int
	mNetwork * NeuralNetwork
	mValue float64
}

func NewNeural(iNetwork *NeuralNetwork, iLayer, iNo , iInputCount int) (*Neural){
	nerual := &Neural{}
	nerual.mNetwork = iNetwork
	nerual.mInputCount = iInputCount
	nerual.mLayer = iLayer
	nerual.mInputChan = make(chan float64)
	nerual.mFeedbackChan = make(chan float64)
	nerual.mNo = iNo
	nerual.mValue = 0.0
	return nerual
}


func (self *Neural) start(regression bool){
	go func(){//forward loop
		defer func(){recover()} ()
		for {
			sum := 0.0
			for i:=0;i<self.mInputCount;i++{
				value := <- self.mInputChan
				sum += value
			}
			if self.mLayer==0 {//input layer
				for i:=0;i<len(self.mNetwork.mHiddenLayer);i++{
					self.mNetwork.mHiddenLayer[i].mInputChan <- sum * self.mNetwork.mWeightHidden[self.mNo][i]
				}
			}else if self.mLayer==1 {//hidden layer
				sum = sigmoid(sum)
				for i:=0;i<len(self.mNetwork.mOutputLayer);i++{
					self.mNetwork.mOutputLayer[i].mInputChan <- sum * self.mNetwork.mWeightOutput[self.mNo][i]
				}
			}else {//output layer
				if !regression{
					sum = sigmoid(sum)
				}
				self.mNetwork.mOutput[self.mNo] = sum 
				self.mNetwork.mForwardDone <- true
			}
			self.mValue = sum
		}

	}()

	go func(){//feedback loop
		defer func(){recover()} ()
		for{
			if self.mLayer==0{ //input layer
				return
			} else if self.mLayer==1{ //hidden layer
				err :=0.0
				for i:=0;i<len(self.mNetwork.mOutput);i++{
					err += <- self.mFeedbackChan
				}
				for i:=0;i<self.mInputCount;i++{
					change := err * dsigmoid(self.mValue) * self.mNetwork.mInputLayer[i].mValue
					self.mNetwork.mWeightHidden[i][self.mNo] -= (self.mNetwork.mRate1*change + self.mNetwork.mRate2*self.mNetwork.mLastChangeHidden[i][self.mNo])
					self.mNetwork.mLastChangeHidden[i][self.mNo] = change
				}
				self.mNetwork.mFeedbackDone <- true
			} else{ //output layer 
				target := <- self.mFeedbackChan
				err := self.mValue - target
				for i:=0;i<self.mInputCount;i++{
					self.mNetwork.mHiddenLayer[i].mFeedbackChan <- err * self.mNetwork.mWeightOutput[i][self.mNo]
				}
				if regression{
					for i:=0;i<self.mInputCount;i++{
						change := err * self.mNetwork.mHiddenLayer[i].mValue
						self.mNetwork.mWeightOutput[i][self.mNo] -= (self.mNetwork.mRate1*change + self.mNetwork.mRate2*self.mNetwork.mLastChangeOutput[i][self.mNo])
						self.mNetwork.mLastChangeOutput[i][self.mNo] = change
					}
				}else{
					for i:=0;i<self.mInputCount;i++{
						change := err * dsigmoid(self.mValue) * self.mNetwork.mHiddenLayer[i].mValue
						self.mNetwork.mWeightOutput[i][self.mNo] -= (self.mNetwork.mRate1*change + self.mNetwork.mRate2*self.mNetwork.mLastChangeOutput[i][self.mNo])
						self.mNetwork.mLastChangeOutput[i][self.mNo] = change
					}
				}

			}
		}
	}()
}


func GetConfusionMatrix(predicted [][]float64, real [][]float64) [][]int {
	min	,max := GetMinMax(GetCol(real,0))
	n := max- min +1
	np, nr := int(n), int(n)
	cm := make([][]int, nr)
	for i := 0; i < nr; i++ {
		cm[i] = make([]int, np)
	}
	for i := range real {
			for j := range real[0] {
					a := int(real[i][j])
		 			b := int(predicted[i][j])
					cm[a][b] = cm[a][b]+1
			}
	}
	return cm
}


func PrintConfusionMatrix(matrix [][]int){
		fmt.Println("Confusion Matrix")
	  fmt.Print("   ")
		for j:= len(matrix[0])-1;j>=0;j-- {
				fmt.Print("|",j,"|")
		}
		fmt.Println()
		for i:= len(matrix)-1;i>=0;i-- {
			fmt.Print("|", i , "|")
			for j:= len(matrix[0])-1;j>=0;j-- {
				fmt.Print("|", matrix[i][j])
			}
			fmt.Println("|")
		}
}

func Recall(matrix[][]int) float64{
		return  float64(matrix[1][1])/float64(matrix[1][1]+ matrix[0][1])
}

func Precision(matrix[][]int) float64{
		return float64(matrix[1][1])/float64(matrix[1][1]+ matrix[1][0])
}
func Accuracy(matrix[][]int) float64{
		return  float64(matrix[1][1] + matrix[0][0])/float64(matrix[0][0]+ matrix[1][0] + matrix[0][1] + matrix[1][1])
}
func Metrics(matrix[][]int){
		fmt.Println("Metricas")
		fmt.Printf("\tRecall: %.2f ", Recall(matrix)*100)
		fmt.Println()
		fmt.Printf("\tPrecision: %.2f", Precision(matrix)*100)
		fmt.Println()
		fmt.Printf("\tAccuracy: %.2f", Accuracy(matrix)*100)
}

func main(){
    split := 0.8
		columnsNamesNN, columnsNN, dataframeNN = readArchiveCSV("https://raw.githubusercontent.com/jysique/datasets/master/wbcd-data.csv")
		dataframeNN = normalizeData(dataframeNN)
	  dataTest, dataTrain = splitPercent(dataframeNN, split)
    _, _, xDataTrain = splitColumns(columnsNamesNN, columnsNN, dataTrain, columnsNamesNN[:len(columnsNamesNN)-1])
    _, _, yDataTrain = splitColumns(columnsNamesNN, columnsNN,dataTrain, []string{columnsNamesNN[len(columnsNamesNN)-1]})
    _, _, xDataTest = splitColumns(columnsNamesNN, columnsNN, dataTest, columnsNamesNN[:len(columnsNamesNN)-1])
    _, _, yDataTest = splitColumns(columnsNamesNN, columnsNN, dataTest, []string{columnsNamesNN[len(columnsNamesNN)-1]})

  inputs := len(xDataTrain[0])

	nn := DefaultNetwork(inputs,int(inputs/2),1,false)
	nn.Start()
  nn.Train(xDataTrain,yDataTrain,1000)

  predictedResults := nn.Test(xDataTest,yDataTest,true)

  confusionMatrix = GetConfusionMatrix(predictedResults,yDataTest)
  PrintConfusionMatrix(confusionMatrix)
	Metrics(confusionMatrix)
	nn.Stop()
}

Overwriting nn.go


In [None]:
!go run nn.go

err:  0.07595721455755762
err:  0.4967369903241563
err:  0.0011323927113966565
err:  6.905955724761228e-06
err:  3.977379441963024e-07
err:  3.796497922860142e-05
[0.7218071860547848 0.365071283095723 0.716710875331565 0.51859256297481 0.6138310893512853 0.3844817602779386 0.4639175257731959 0.5183896620278331 0.5950657894736843 0.603756157635468 0.2635572572224156 0.15993858751279427 0.24740673339399452 0.17417926964219843 0.3690973337616447 0.18175775480059086 0.14363636363636362 0.3570752036370525 0.22241925269157692 0.1714142091152815 0.6254162042175361 0.3364957610012112 0.6058917197452229 0.3702397743300423 0.6172506738544474 0.1937618147448015 0.31948881789137384 0.5584192439862542 0.35613136486893643 0.3700240963855422] ->		 0.7307680434222905 ->	Clase Predicha [1] : [1]
[0.488438278192814 0.5756109979633401 0.496551724137931 0.23122750899640143 0.6921664626682987 0.6638679791546034 0.49859418931583876 0.3988568588469185 0.6805921052631579 0.7883825944170771 0.07382526975287156