In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import pandas as pd
import random as rd

In [345]:

#encoding: utf-8

##
## cartpole.py
## Gaetan JUVIN 06/24/2017
##

import gym
import random
import os
import numpy as np
from collections      import deque
from keras.models     import Sequential
from keras.layers     import Dense
from keras.optimizers import Adam

class Agent():
    def __init__(self, state_size, action_size):
        self.weight_backup      = "cartpole_weight.h5"
        self.state_size         = state_size
        self.action_size        = action_size
        self.memory             = deque(maxlen=2000)
        self.learning_rate      = 0.001
        self.gamma              = 0.95
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995
        self.brain              = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

        if os.path.isfile(self.weight_backup):
            model.load_weights(self.weight_backup)
            self.exploration_rate = self.exploration_min
        return model

    def save_model(self):
            self.brain.save(self.weight_backup)

    def act(self, state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        act_values = self.brain.predict(state)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, sample_batch_size):
        if len(self.memory) < sample_batch_size:
            return
        sample_batch = random.sample(self.memory, sample_batch_size)
        for state, action, reward, next_state, done in sample_batch:
            target = reward
            if not done:
              target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
            target_f = self.brain.predict(state)
            target_f[0][action] = target
            self.brain.fit(state, target_f, epochs=1, verbose=0)
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay

class CartPole:
    def __init__(self):
        self.sample_batch_size = 32
        self.episodes          = 10000
        self.env               = gym.make('CartPole-v1')

        self.state_size        = self.env.observation_space.shape[0]
        self.action_size       = self.env.action_space.n
        self.agent             = Agent(self.state_size, self.action_size)


    def run(self):
        try:
            for index_episode in range(self.episodes):
                state = self.env.reset()
                state = np.reshape(state, [1, self.state_size])

                done = False
                index = 0
                while not done:
                    #self.env.render()

                    action = self.agent.act(state)

                    next_state, reward, done, _ = self.env.step(action)
                    next_state = np.reshape(next_state, [1, self.state_size])
                    self.agent.remember(state, action, reward, next_state, done)
                    state = next_state
                    index += 1
                print("Episode {}# Score: {}".format(index_episode, index + 1))
                self.agent.replay(self.sample_batch_size)
        finally:
            self.agent.save_model()

if __name__ == "__main__":
    cartpole = CartPole()
    cartpole.run()


Episode 0# Score: 501
Episode 1# Score: 129
Episode 2# Score: 153


KeyboardInterrupt: 

In [8]:
x = np.array([1, 2, 3, 4, 5])
y = x * 2 + 1

# 학습 구조 모델링
model = Sequential()
model.add(Dense(1, input_shape=(1,)))
model.compile('SGD', 'mse')

# 학습 수행
model.fit(x, y, epochs=1000, verbose=0) #verbose 학습진행여부

# 평가
print('y:', y, ',predict:', model.predict(x).flatten())


y: [ 3  5  7  9 11] ,predict: [ 2.9876935  4.9924083  6.997123   9.001837  11.006552 ]


In [3]:
import gym
import numpy as np
import matplotlib.pyplot as plt
from gym.envs.registration import register

register(
    id='FrozenLake-v3',
    entry_point = 'gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name':'4x4',
           'is_slippery':False}
)

env = gym.make('FrozenLake-v3')

# Q Table을 모두 0으로 초기화 한다. : 2차원 (number of state, action space) = (16,4)
Q = np.zeros([env.observation_space.n, env.action_space.n])

# discount 정의 => 미래의 reward를 현재의 reward 보다 조금 낮게 본다.
dis = 0.99

# 몇 번 시도를 할 것인가 (에피소드)
num_episodes = 2000

# 에피소드마다 총 리워드의 합을 저장하는 리스트
rList = []


for i in range(num_episodes) : 
    state = env.reset()
    rAll = 0
    done = False
    
    # Q learning 알고리즘
    while not done : 
        # Action 중에 가장 R(Reward)이 큰 Action을 고른다. 
        # 이 때, random noise 방식으로 decaying Exploit & Exploration 구현 
        action = np.argmax(Q[state, :] + np.random.randn(1, env.action_space.n) / (i+1))
        
        # 해당 Action을 했을 때 environment가 변하고, 새로운 state, reward, done 여부를 반환 받음
        new_state, reward, done, _ = env.step(action)
        
        # Q = R + Q 
        Q[state, action] = reward + dis * np.max(Q[new_state, :])
        
        rAll += reward
        state = new_state
        
    rList.append(rAll)

for i in range(num_episodes) : 
    state = env.reset()
    rAll = 0
    done = False
    
    # exploration의 확률 (decaying)
    e = 1./((i / 100) + 1)
    
    # Q learning 알고리즘
    while not done : 
        
        # E-Greedy 알고리즘으로 action 고르기
        if np.random.rand(1) < e :
            action = env.action_space.sample()
        else : 
            action = np.argmax(Q[state, :])
        
        # 해당 Action을 했을 때 environment가 변하고, 새로운 state, reward, done 여부를 반환 받음
        new_state, reward, done, _ = env.step(action)
        
        # Q = R + Q 
        Q[state, action] = reward + dis * np.max(Q[new_state, :])
        
        rAll += reward
        state = new_state
        
    rList.append(rAll)

print("Success rate : "+str(sum(rList) / num_episodes))
print("Final Q-Table Values")
print(Q)

plt.bar(range(len(rList)), rList, color="blue")
plt.show()



Error: Cannot re-register id: FrozenLake-v3

In [342]:
import numpy as np
import random
state = [1,2,3]
state_size = 3
a=np.reshape(state, [1, state_size])
print(a[0][0])
print(random.randrange(self.action_size))

1
0


In [324]:
#x,y, ld
# 1~288
#j = 72, ld = 1 => 144+72   x = 6 , y = 12
k=random.randint(1,288)
print(k)
k=288
if k > 144:
    j = k-144
    ld=1
else:
    j= k
    ld =0

x = (j-1) // 12 +1
y = j-12*(x-1)
print(x,y,ld)


182
12 12 1


In [343]:
print("Episode {}# Score: {}".format(3,5))

Episode 3# Score: 5


In [351]:
a = [0.5,0.4]
print(a[0])

0.5


In [356]:
a = [(1,2,3),(4,5,6)]
for x,y,z in a:
    print(x,y,z)

1 2 3
4 5 6


In [None]:
from matplotlib import pyplot as plt

#encoding: utf-8


import lakiaro_to_learning as lk
import gym
import random
import os
import numpy as np
from collections      import deque
from keras.models     import Sequential
from keras.layers     import Dense
from keras.optimizers import Adam

class Agent():
    def __init__(self, state_size, action_size):
        self.weight_backup      = "circle.h5"
        self.state_size         = state_size
        self.action_size        = action_size
        self.memory             = deque(maxlen=2000)
        self.learning_rate      = 0.001
        self.gamma              = 0.95
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995
        self.brain              = self._build_model()
        

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(32, input_dim=self.state_size, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

        if os.path.isfile(self.weight_backup):
            model.load_weights(self.weight_backup)
            self.exploration_rate = self.exploration_min
        return model

    def save_model(self):
            self.brain.save(self.weight_backup)

    def act(self, state):
        if np.random.rand() <= self.exploration_rate:
            #print(1)
            rtn = [(np.random.rand(),np.random.rand())]
            return (rtn)
        #print(2)
        act_values = self.brain.predict(state)
        #print(self.brain.predict(state))
        #return np.argmax(act_values[0])
        return act_values

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, sample_batch_size):
        if len(self.memory) < sample_batch_size:
            return
        sample_batch = random.sample(self.memory, sample_batch_size)
        #print(sample_batch)
        for state, action, reward, next_state, done in sample_batch:
            print(state, action, reward, next_state, done)
            target = reward
            if not done:
                print('predict, nextstate',self.brain.predict(next_state))
                target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
            target_f = self.brain.predict(state)
            print('target_f',target_f)
            #target_f[0]= target
            target_f[0] = target
            self.brain.fit(state, target_f, epochs=1, verbose=0)
            #fit(x,y,epochs,verbose)
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay
class circle():
    
    def __init__(self):
        self.score = 0

    
    def reset(self):
        self.x=[]
        self.y=[]
        x=0
        y=0
        self.Done = False
        self.index = 0
        return x**2 +y**2
        
    def input_xy(self,xy):
        self.index += 1
        x= xy[0][0]
        y=xy[0][1]
        if x**2 +y**2 >= 1*0.9995:
            self.x.append(x)
            self.y.append(y)
            rwd = 1
        else:
            rwd = 0
        if self.index ==1000:
            self.Done = True
            
        return x**2 +y**2,rwd,self.Done
    
    def to_graph(self):
        plt.plot(self.x, self.y,'or')
        plt.show()
        print(self.score)
        
class one():
    def __init__(self):
        self.circle = circle()
        self.sample_batch_size = 32
        self.episodes          = 1
        self.state_size        = 1
        self.action_size       = 2
        self.agent             = Agent(self.state_size, self.action_size)


    def run(self):
        try:
            for index_episode in range(self.episodes):
                state = self.circle.reset()
                state = np.reshape(state, [1, self.state_size])
                #print(state)
                done = False
                index = 0
                reward_a = 0
                #self.agent.act_lst_reset()
                while not done:
                    action = self.agent.act(state)
                    #print('action', action)
                    next_state, reward, done = self.circle.input_xy(action)
                    #print(next_state, reward, done)
                    next_state = np.reshape(next_state, [1, self.state_size])
                    self.agent.remember(state, action, reward, next_state, done)
                    state = next_state
                    index += 1
                    reward_a += reward
                print("Episode {}# Score: {}".format(index_episode, reward_a))
                self.agent.replay(self.sample_batch_size)
        finally:
            self.agent.save_model()
        
    def train(self):
        

if __name__ == "__main__":
    cartpole = one()
    cartpole.run()


In [4]:
import numpy as np
import pandas as pd
x_train = np.random.random((1000, 1))
y_train = x_train * 2 + np.random.random((1000, 1)) / 3.0

x_test = np.random.random((100, 1))
y_test = x_test * 2 + np.random.random((100, 1)) / 3.0

In [29]:
data = np.full((12,12),95)
#data = np.zeros((self.board_size,self.board_size))
df = pd.DataFrame(data)
data2 = data.reshape(144)
print(data2)
lst = df.values.tolist()
print(lst)
y = np.array(lst)
y = y.reshape(144)
print(np.count_nonzero.all(y >= 95,  y < 99))
print(y.count(0))

[95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95
 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95
 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95
 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95
 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95
 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95]
[[95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95], [95, 95, 9

AttributeError: 'function' object has no attribute 'all'

In [33]:
import numpy as np
from collections      import deque
from keras.models     import Sequential
from keras.layers     import Dense
from keras.optimizers import Adam
import random
rtn = random.randint(1,1000)
rtn = np.to_categorical(rtn)
rtn = np.argmax(rtn)
print(rtn)

AttributeError: module 'numpy' has no attribute 'to_categorical'

In [1]:
import pandas as pd
import tensorflow as tf

In [2]:
csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/applied-dl/heart.csv')

Downloading data from https://storage.googleapis.com/applied-dl/heart.csv


Exception: URL fetch failure on https://storage.googleapis.com/applied-dl/heart.csv: 403 -- Forbidden