In [None]:
import numpy as np
import matplotlib.pyplot as plt
from models.system_model import SystemModel
from models.dqn import DQN
from config.parameters import *

def main():
    Episodes_number = 300
    T = 120  # Service duration and time steps
    T_AS = np.arange(0, T, 40)
    env = SystemModel()
    agent = DQN()
    Epsilon = 0.9
    datarate_seq = np.zeros(T)
    WorstuserRate_seq = np.zeros(T)
    Through_put_seq = np.zeros(Episodes_number)
    Worstuser_TP_seq = np.zeros(Episodes_number)
    UAV_trajectory = []
    User_trajectory = []
    plot_seq = np.array([])

    for episode in range(Episodes_number):
        env.Reset_position()
        Epsilon -= 0.9 / (Episodes_number - 50)
        p = 0

        for t in range(T):
            if t in T_AS:
                User_AS_List = agent.User_association(env.PositionOfUAVs, env.PositionOfUsers, NumberOfUAVs, NumberOfUsers)

            if episode == Episodes_number-1:
                UAV_trajectory.append(copy.deepcopy(env.PositionOfUAVs.values))
                User_trajectory.append(copy.deepcopy(env.PositionOfUsers.values))

            for UAV in range(NumberOfUAVs):
                # Calculate channel gains
                Distence_CG = env.Get_Distance_U2K(env.PositionOfUAVs, env.PositionOfUsers, NumberOfUAVs, NumberOfUsers)
                PL_for_CG = env.Get_Propergation_Loss(Distence_CG, env.PositionOfUAVs, NumberOfUAVs, NumberOfUsers, F_c)
                CG = env.Get_Channel_Gain_NOMA(NumberOfUAVs, NumberOfUsers, PL_for_CG, User_AS_List, NoisePower)
                Eq_CG = env.Get_Channel_Gain_NOMA(NumberOfUAVs, NumberOfUsers, PL_for_CG, User_AS_List, NoisePower)

                # Generate current state and choose action
                State = env.Create_state_Noposition(UAV, User_AS_List, CG)
                action_name = agent.Choose_action(State[0], Epsilon, UAV, User_AS_List)
                env.take_action_NOMA(action_name, UAV, User_AS_List, Eq_CG)

                # Calculate reward
                Distence = env.Get_Distance_U2K(env.PositionOfUAVs, env.PositionOfUsers, NumberOfUAVs, NumberOfUsers)
                P_L = env.Get_Propergation_Loss(Distence, env.PositionOfUAVs, NumberOfUAVs, NumberOfUsers, F_c)
                SINR = env.Get_SINR_NNOMA(NumberOfUAVs, NumberOfUsers, P_L, User_AS_List, Eq_CG, NoisePower)
                DataRate, SumRate, WorstuserRate = env.Calcullate_Datarate(SINR, NumberOfUsers, Bandwidth)

                Reward = SumRate
                if WorstuserRate < R_require:
                    Reward = Reward/2
                    p += 1

                # Get next state
                CG_next = env.Get_Channel_Gain_NOMA(NumberOfUAVs, NumberOfUsers, P_L, User_AS_List, NoisePower)
                Next_state = env.Create_state_Noposition(UAV, User_AS_List, CG_next)

                # Store experience with action mask
                State_for_memory = copy.deepcopy(State[0])
                Action_for_memory = copy.deepcopy(action_name)
                Next_state_for_memory = copy.deepcopy(Next_state[0])
                Reward_for_memory = copy.deepcopy(Reward)

                agent.remember(State_for_memory, Action_for_memory, Next_state_for_memory, 
                             Reward_for_memory, UAV, User_AS_List)
                agent.train()

                env.User_randomMove(MAXUserspeed, NumberOfUsers)
                env.User_Purposive_Move_6(PMAXUserspeed)

                if UAV == (NumberOfUAVs-1):
                    Rate_during_t = copy.deepcopy(SumRate)
                    datarate_seq[t] = Rate_during_t
                    WorstuserRate_seq[t] = WorstuserRate

            plot_seq = np.append(plot_seq, datarate_seq)

        Through_put = np.sum(datarate_seq)
        Worstuser_TP = np.sum(WorstuserRate_seq)
        Through_put_seq[episode] = Through_put
        Worstuser_TP_seq[episode] = Worstuser_TP

        print('Episode=', episode, 'Epsilon=', Epsilon, 'Punishment=', p, 'Through_put=', Through_put)

    # Save results
    np.save("Through_put_NOMA.npy", Through_put_seq)
    np.save("WorstUser_Through_put_NOMA.npy", Worstuser_TP_seq)
    np.save("Total Data Rate_NOMA.npy", datarate_seq)
    np.save("PositionOfUsers_end_NOMA.npy", env.PositionOfUsers)
    np.save("PositionOfUAVs_end_NOMA.npy", env.PositionOfUAVs)
    np.save('UAV_trajectory', UAV_trajectory)
    np.save('User_trajectory', User_trajectory)

    print("PositionOfUsers_end_NOMA.npy", env.PositionOfUsers)
    print("PositionOfUAVs_end_NOMA.npy", env.PositionOfUAVs)

    # Plot results
    x_axis = range(1, Episodes_number+1)
    plt.plot(x_axis, Through_put_seq)
    plt.xlabel('Episodes')
    plt.ylabel('Throughput')
    plt.savefig('Through_put_NOMA.png')
    plt.show()

    plt.plot(x_axis, Worstuser_TP_seq)
    plt.xlabel('Episodes')
    plt.ylabel('Throughput of Worst User')
    plt.savefig('WorstUser_Through_put_NOMA.png')
    plt.show()

    x_axis2 = range(0, T)
    plt.plot(x_axis2, datarate_seq)
    plt.xlabel('Time slots')
    plt.ylabel('Data Rate')
    plt.savefig('Total Data Rate_NOMA.png')
    plt.show()

if __name__ == '__main__':
    main()