In [1]:
import os
import sys
import csv
import json
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import MyFunctions as MyFuncs

```TiTransitionsCSV()``` takes in a path eg ```'/Users/maggie/Desktop/DMU_project_test/'``` and creates ...

In [84]:
class TiTransitionsCSV():
    def __init__(self, root_path, rf, action):
        self.root_dir = root_path
        self.results_file = rf
        self.action = action
        
    def mylistdir(self, directory, bit='', end=True):
        filelist = os.listdir(directory)
        if end:
            return [x for x in filelist if x.endswith(f'{bit}') and not x.startswith('.') and not 'Icon' in x]
        else:
             return [x for x in filelist if x.startswith(f'{bit}') and not x.startswith('.') and not 'Icon' in x]

    def make_storage_directory(self, target_dir):
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
        return target_dir        
          
        
    def read_T(self, file_p):
        df = pd.read_csv(file_p)
        columns = ['Ti', 'Ti_prime', 'To']
        for col in columns:
            df[col] = df[col].round()
        df['To_prime'] = df['To'].shift(-1)
        df = df.drop(df.tail(1).index)
        return df

    
    def get_dist(self, df):
        minTi, maxTi = df['Ti'].min(), df['Ti'].max()
        minTo, maxTo = df['To'].min(), df['To'].max()
        allTi = [t for t in range(int(minTi), int(maxTi)+1)]
        allTo = [t for t in range(int(minTo), int(maxTo)+1)]

        all_pairs = [(x,y) for x in allTo for y in allTi]
        pair_inds = {x:i for i, x in enumerate(all_pairs)}
        Ti_inds = {x:int(x-minTi) for x in allTi}
        
#         print(len(allTi), len(allTo))
#         print(len(pair_inds))
#         print(len(Ti_inds))
        
#         self.write_inds(pair_inds, 'ToTi', 'index,temp out,temp in')
#         self.write_inds(Ti_inds, 'Ti', 'index,temp in')

        transT = np.zeros((len(allTi)*len(allTo), len(allTi)))
        
        df = df[df.a==self.action]
        
        
        for index, row in df.iterrows():
            i = pair_inds[(int(row.To), int(row.Ti))]
            j = Ti_inds[(int(row.Ti_prime))]            
            transT[i,j] += 1
        transT = transT/transT.sum()
        
        ToTi_Ti = pd.DataFrame(data=transT, index=pair_inds.keys(), columns=Ti_inds.keys())
        df_ext = ToTi_Ti
        
        for i in range(1, len(allTo)):
            df_ext = pd.concat([df_ext, ToTi_Ti], axis=1, sort=False)
        df_ext.index = [df_ext.index.map('({0[0]},{0[1]})'.format)]
        
        return df_ext


    def write_inds(self, inds_dict, fname, header):
        csv_file = os.path.join(self.root_dir,  f'{fname}-inds-{self.rf_num}.csv')
        np.savetxt(csv_file, np.column_stack(([v for v in inds_dict.values()], [k for k in inds_dict.keys()])), 
                   fmt='%10.1f', delimiter=',', header=header, comments='')

        
#     def write_csv(self, df):    
#         T_store = self.make_storage_directory(os.path.join(self.root_dir, 'TransitionProbabilties'))
#         csv_file = os.path.join(T_store, f'To_Ti-{self.rf_num}.csv')
#         np.savetxt(csv_file, transitionT, fmt='%10.5f', delimiter=',')


            
    def main(self):
        self.rf_num = self.results_file.split('_')[1].strip('.csv')
        file_path = os.path.join(self.root_dir, 'data_from_simulink', self.results_file)
        self.df = self.read_T(file_path)
        self.TransMatrix = self.get_dist(self.df)
#             self.write_csv(T)

In [88]:
root_dir = '/Users/maggie/Desktop/DMU_project_test/'
results_file = 'resultFile_1.csv'
actions = [0, 6000, 10000]

for a in actions:
    t = TiTransitionsCSV(root_dir, results_file, a)
    t.main()
# t.TransMatrix
# t.TransMatrix.to_csv('/Users/maggie/Desktop/testcsv.csv')



Index(['Ti', 'Ti_prime', 'To', 'a', 'To_prime'], dtype='object')
6709
1676
Index(['Ti', 'Ti_prime', 'To', 'a', 'To_prime'], dtype='object')
6709
2237
Index(['Ti', 'Ti_prime', 'To', 'a', 'To_prime'], dtype='object')
6709
2796


In [115]:
class ToTransitionsCSV():
    def __init__(self, r_path, w_name, h):
        self.root_dir = r_path
        self.weather_file = os.path.join(self.root_dir, w_name)
        self.hour = h
        
    def read_T(self, file_p):
        df = pd.read_csv(file_p)
        df.Temperature_degC = df.Temperature_degC.round()
        df.Hour = pd.to_datetime(df.Hour).dt.strftime('%H:%M')
        return df
    
    def ind(self, t):
        return int(t-self.minT)
    
    def get_dist(self, df):
        self.all_H = df.Hour.unique()
        self.minT = df['Temperature_degC'].min()
        self.maxT = df['Temperature_degC'].max()
        allT = [t for t in range(int(self.minT), int(self.maxT)+1)]
        self.write_allT(allT)
        df['Tprime'] = df['Temperature_degC'].shift(-1)
        df = df.drop(df.tail(1).index)
        
#         Temp_T = {}
#         for h in all_H:

        df = df[df.Hour == self.hour]
        transT = np.zeros((len(allT), len(allT)))
        for index, row in df.iterrows():
            i, j = self.ind(row['Temperature_degC']), self.ind(row['Tprime'])
            transT[i,j] += 1
            transT = transT/transT.sum()
            To_To = pd.DataFrame(data=transT, index=allT, columns=allT)
            
#             Temp_T[h] = To_To
        return To_To
        
    def write_allT(self, allT):
        T_ind = [self.ind(t) for t in allT]
        csv_file = os.path.join(self.root_dir, f'temperature_indices.csv')
        np.savetxt(csv_file, np.column_stack((T_ind, allT)), fmt='%10.1f', delimiter=',', header='index,temperature', comments='')
    
    def write_csv(self, dist_dict):
        for h in dist_dict:       
            csv_file = os.path.join(self.root_dir, 'TransitionProbabilties', 'outdoorT', f'To_{h[0:2]}.csv')
            np.savetxt(csv_file, dist_dict[h], fmt='%10.5f', delimiter=',')

            
    def main(self):
        df = self.read_T(self.weather_file)
        self.T = self.get_dist(df)
#         self.write_csv(T)


In [118]:
#Toutdoor transitions
# write_path = '/Users/maggie/Documents/Github/DMU-Final-Project/'

root_dir = '/Users/maggie/Desktop/DMU_project_test/'
weather_file = 'boulder_hourlyTemps_jan.csv'



# actions = [0, 6000, 10000]

# for a in actions:

hours = [str(x).zfill(2) + ':00' for x in range(0,24)]
for h in hours: 
    t = ToTransitionsCSV(root_dir, weather_file, h)
    t.main()
    print(t.T)


# for k in t.T:
#     print(k)
#     print(t.T[k])

print(hours)

     -23  -22           -21           -20  -19  -18  -17           -16  -15  \
-23  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-22  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-21  0.0  0.0  1.490116e-08  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-20  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-19  0.0  0.0  0.000000e+00  5.960464e-08  0.0  0.0  0.0  0.000000e+00  0.0   
-18  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-17  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-16  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  1.192093e-07  0.0   
-15  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-14  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-13  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0  0.0  0.000000e+00  0.0   
-12  0.0  0.0  0.000000e+00  0.000000e+00  0.0  0.0 

In [113]:
# H = [f'{x2.0f}:00' for x in range(0,24)]
# # {occ_probs[hour][0]:.4f}
# # [flags][width][.precision]type 

# {:.2f}

hours = [str(x).zfill(2) + ':00' for x in range(0,24)]
print(hours)

['00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00', '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00']


In [49]:
class HomeData():
    def __init__(self, path, write):
        self.root_dir = path
        self.write_dir = write
        self.home = path.split('/')[-1].split('-')[-2]
        self.system = path.split('/')[-1].split('-')[-1]
    
    
    def mylistdir(self, directory, bit='', end=True):
        filelist = os.listdir(directory)
        if end:
            return [x for x in filelist if x.endswith(f'{bit}') and not x.startswith('.') and not 'Icon' in x]
        else:
             return [x for x in filelist if x.startswith(f'{bit}') and not x.startswith('.') and not 'Icon' in x]

    def make_storage_directory(self, target_dir):
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
        return target_dir


# reads in occupancy files and creates dfs for all occupants
class HomeOccupancy(HomeData):
    def __init__(self, path, write):     
        HomeData.__init__(self, path, write) 
        self.ground_path = os.path.join(self.root_dir, 'GroundTruth')
        self.occupant_names = []
        self.average_length = 60
        self.start_date = '2019-02-04'
        self.end_date = '2019-04-08'

    def get_ground_truth(self):
        occupant_files = self.mylistdir(self.ground_path, '.csv')
        occupants = {}
        enter_times, exit_times = [], []
        
        for occ in occupant_files:
            occupant_name = occ.strip('.csv').split('-')[1]
            self.occupant_names.append(occupant_name)
            ishome = []
            with open(os.path.join(self.ground_path, occ)) as csv_file:
                csv_reader, line_count = csv.reader(csv_file, delimiter=','), 0
                for row in csv_reader:
                    status, when = row[1], row[2].split('at')
                    dt_day = datetime.strptime(str(when[0] + when[1]), '%B %d, %Y  %I:%M%p')
                    ishome.append((status, dt_day))
                    if line_count == 0:
                        enter_times.append(dt_day)
                    line_count += 1
                exit_times.append(dt_day)
                
            occupants[occupant_name] = ishome        
        self.first_last = (sorted(enter_times)[0], sorted(exit_times)[-1])
        print(self.occupant_names)
        return occupants
    
    def create_occupancy_df(self, occupants, frequency):
        occ_range = pd.date_range(start=self.first_last[0], end=self.first_last[1], freq=frequency)    
        occ_df = pd.DataFrame(index=occ_range)
        
        for occ in occupants:
            occ_df[occ] = 99
            s1 = 'exited'
            for r in occupants[occ]:
                date = r[1]
                s2 = r[0]                
                occ_df.loc[(occ_df.index < date) & (occ_df[occ]==99) & (s1 == 'exited') & (s2 == 'entered'), occ] =  0
                occ_df.loc[(occ_df.index < date) & (occ_df[occ]==99) & (s1 == 'entered') & (s2 == 'exited'), occ] =  1
                s1 = s2               
            occ_df.loc[(occ_df.index >= date) & (occ_df[occ] == 99) & (s1 == 'entered'), occ] = 1
            occ_df.loc[(occ_df.index >= date) & (occ_df[occ] == 99) & (s1 == 'exited'), occ] = 0
        
        occ_df['day'] = occ_df.index.weekday
        occ_df['weekend'] = 1
        occ_df['day_name'] = occ_df.index.day_name()
        occ_df.loc[occ_df.day < 5, 'weekend'] = 0 
        return (occ_df)
    
    def average_df(self, df):
        time_series = []
        for group, df_chunk in df.groupby(np.arange(len(df))//self.average_length):
            df_max = df_chunk.max()
            df_index = df_chunk.iloc[-1]
            time_series.append(df_index.name)
            df_summary = df_max.to_frame().transpose() 
            new_df = df_summary if group == 0 else pd.concat([new_df, df_summary])
        new_df.index = time_series  
        return new_df
          
    def write_occupancy_csv(self, df, fname):#, write_dir):   
        target_dir = self.make_storage_directory(os.path.join(self.write_dir, 'Occupancy_CSVs'))
        fname = os.path.join(target_dir, fname)
        df.to_csv(fname, index = True)
        print(fname + ': Write Successful!')
            
    def main(self):
        self.occupant_status = self.get_ground_truth()  
        df_hr = self.create_occupancy_df(self.occupant_status, frequency='1H')
        self.df_hr = df_hr.loc[(df_hr.index >= self.start_date) & (df_hr.index < self.end_date)] 
        self.write_occupancy_csv(self.df_hr, f'{self.home}-{self.system}-Occupancy_df.csv')#, self.write_dir)

# takes dfs for all occupants are create probabilites for transitions into/out of occupancy states for one person
class GetProbs(HomeData):
    def __init__(self, df, name, write):
        self.name = name
        self.all_H = [x for x in range(0,24)]
        self.df = df
        self.write_dir = write

    def get_occ(self, df):    
        hours_occ = {}
        hours_unocc = {}
        for h in self.all_H:
            occ_to_un = len(df.loc[(df.index.hour == h) & (df.diff(periods=-1).index.hour==h) & (df.diff(periods=-1)==1)])            # ~A|B
            same_occ  = len(df.loc[(df.index.hour == h) & (df.diff(periods=-1).index.hour==h) & (df.diff(periods=-1)==0)& (df==1)])   #  A|B
            
            un_to_occ = len(df.loc[(df.index.hour == h) & (df.diff(periods=-1).index.hour==h) & (df.diff(periods=-1)==-1)])           #  A|~B
            same_un   = len(df.loc[(df.index.hour == h) & (df.diff(periods=-1).index.hour==h) & (df.diff(periods=-1)==0)& (df==0)])   # ~A|~B
            
            t_occ = occ_to_un+same_occ
            t_un = un_to_occ+same_un
            
            p_leave = occ_to_un/t_occ if t_occ > 0 else 0.0
            p_arrive = un_to_occ/t_un if t_un > 0 else 0.0
            hours_occ[h] = (1-p_leave, p_leave)      # if home: (probability stay home, probability leave)
            hours_unocc[h] = (p_arrive, 1-p_arrive)  # if gone: (probability arrive, probability stay out)

        return hours_occ, hours_unocc

    def write_occ(self, occ_probs, fname, cols):
        store_dir = self.make_storage_directory(os.path.join(self.write_dir, 'TransitionProbabilties'))
        csv_file = os.path.join(store_dir, fname)
        with open(csv_file, 'w') as f:
            f.write(f'{cols[0]}, {cols[1]}, {cols[2]}'+'\n')
            for hour in occ_probs:
                f.write(f'{hour}:00, {occ_probs[hour][0]:.4f}, {occ_probs[hour][1]:.4f}\n')        
        print(csv_file + ': Write Successful!')


    def main(self):
        self.Home, self.notHome = self.get_occ(self.df[self.name].loc[self.df.weekend==0])
#         self.write_occ(Home, f'{name}-given-occupied.csv', ['hour', 'prob leave', 'prob no leave'])
#         self.write_occ(notHome, f'{name}-given-unoccupied.csv', ['hour', 'prob arrive', 'prob no arrive'])


# creates plot of occupancy for one person
class PlotOcc(HomeData):
    def __init__(self, df, name, write, D=21):
        self.df = df
        self.days = D
        self.name = name
        self.write_dir = write
    
    def write_occ_asPD(self, df):
        df['Date'] = pd.to_datetime(df.index)         
        df = df.set_index('Date')
        dfs = []
        day1 = df.index[0]
        day_start = df.day[0]
        num_weeks = int(np.ceil(len(df)/(24*self.days)))
        print('{} time periods of {} days'.format(num_weeks, self.days))
        
        if day_start > 0:
            print(int(24*(7-day_start)))
            day1 = day1 + timedelta(days = int(self.days-day_start))
            df1 = df.loc[(df.index <= day1)]
            dfs.append(df1)
        
        for i in range(num_weeks):
            dayf = day1 + timedelta(days = self.days*i)
            dayn = day1 + timedelta(days = self.days*(i+1))
            print(dayf, dayn)
            dfn = df.loc[(df.index >= dayf) & (df.index < dayn)]
            dfs.append(dfn)
        return dfs

    def highlight_weekend(self, D, df, ax):
        for i in range(int(self.days/7)):
            start = df[(df.weekend > 0)].index[48*i]
            end = start + timedelta(days=2)
            ax.axvspan(start, end, facecolor='pink', edgecolor='none', alpha=0.6)
        return ax

    def plot_occ_all(self, dfs, n=5, scale=1.2, height=4):
        for x, df in enumerate(dfs[0:n]):
            L = np.floor(len(df)/24)*scale
            ax = df.plot(y=name, title = self.name, figsize = (L,height), legend=False)
            ax = self.highlight_weekend(self.days, df, ax)
            save_dir = self.make_storage_directory(os.path.join(self.write_dir, 'Occ_Figs'))
            plt.savefig(os.path.join(save_dir, f'{self.name}_{x}.png'))       

    def main(self):
        dfs = self.write_occ_asPD(self.df)
        self.plot_occ_all(dfs)

In [50]:
#Occupancy
write_path =  '/Users/maggie/Desktop/DMU_project_test/'
root_path = '/Users/maggie/Desktop/HPD_mobile_data/HPD-env-summaries/HPD_mobile-H1/H1-black'
name = 'Will'

# if len(sys.argv) > 1:
#     root_path = sys.argv[1]
#     name = sys.argv[2]
# else:
#     root_path = '/Users/maggie/Desktop/HPD_mobile_data/HPD-env-summaries/HPD_mobile-H1/H1-black'
#     name = 'Will'

print(f'Getting occupancy data from {root_path} ...')
H1_occ = HomeOccupancy(root_path, write_path)
H1_occ.main()
print(f'Full occupancy df created!')
full_df = H1_occ.df_hr

print(f'Getting probabilties for {name} ...')
H1_p1_probs = GetProbs(full_df, name, write_path)
H1_p1_probs.main()

# print(f'Saving figures for {name} ...')
# p = PlotOcc(full_df, name, write_path)
# p.main()

Getting occupancy data from /Users/maggie/Desktop/HPD_mobile_data/HPD-env-summaries/HPD_mobile-H1/H1-black ...
['Will', 'Maggie', 'Sade', 'Kurt']
/Users/maggie/Desktop/DMU_project_test/Occupancy_CSVs/H1-black-Occupancy_df.csv: Write Successful!
Full occupancy df created!
Getting probabilties for Will ...


In [51]:
print(H1_p1_probs.Home)

{0: (1.0, 0.0), 1: (1.0, 0.0), 2: (1.0, 0.0), 3: (1.0, 0.0), 4: (1.0, 0.0), 5: (1.0, 0.0), 6: (1.0, 0.0), 7: (0.8717948717948718, 0.1282051282051282), 8: (0.5294117647058824, 0.47058823529411764), 9: (0.38888888888888884, 0.6111111111111112), 10: (0.25, 0.75), 11: (0.5, 0.5), 12: (1.0, 0.0), 13: (1.0, 0.0), 14: (0.6666666666666667, 0.3333333333333333), 15: (1.0, 0.0), 16: (1.0, 0.0), 17: (1.0, 0.0), 18: (0.7142857142857143, 0.2857142857142857), 19: (0.7142857142857143, 0.2857142857142857), 20: (1.0, 0.0), 21: (0.9047619047619048, 0.09523809523809523), 22: (0.9285714285714286, 0.07142857142857142), 23: (1.0, 0.0)}


In [25]:
toy1 = pd.DataFrame(np.array([[1, 2], [2,1], [3,3], [3,1], [2,2], [1,3]]), columns=['t1p', 't2p'], index=['T1t1','T1t2', 'T2t1', 'T2t2', 'T3t1', 'T3t2' ])
toy2 = pd.DataFrame(np.array([[1, 2, 3], [2,1, 1], [3,3, 3]]), columns=['T1p', 'T2p', 'T3p'], index=['T1', 'T2', 'T3' ])

# print(toy1)
# print(toy2)

cols_orig = [x for x in toy2.columns]
new = np.repeat(cols_orig, 2)


toy1_dup = toy1
for i in range(1, 3):
    toy1_dup = pd.concat([toy1_dup, toy1], axis=1, sort=False)


for col in toy2.columns:
    for i in reversed(range(1, len(toy1.columns))):
        toy2.insert(toy2.columns.get_loc(col), f'{col}-{i}', toy2[col])       
toy2 = toy2.iloc[np.arange(len(toy2)).repeat(2)]
toy2.columns = new

# print(toy1_dup)
# print(toy2)




new_col = [f'{x}_{y}' for x,y in zip(toy1_dup, toy2)]
new_ind = [f'{x}_{y}' for x,y in zip(toy1_dup.index, toy2.index)]
# print(new_ind)

toy1_dup.columns = new_col
toy2.columns = new_col
# toy2.insert(loc=0, column=, value=new_col)

toy1_dup['ind'] = new_ind
toy1_dup = toy1_dup.set_index('ind')

toy2['ind'] = new_ind
toy2 = toy2.set_index('ind')



# print(toy1_dup)
# print(toy2)

# print(toy2_dup)
# toy2_dup2 = pd.DataFrame(np.repeat(toy2_dup.values,2,axis=0))
# toy2_dup2.columns = toy2_dup.columns
# print(toy2_dup2)


# print(toy1)


# print(toy1_dup)
# print("extended matrices")    
# # # print(toy3)
t3 = toy1_dup*toy2
print(t3)

occ = pd.DataFrame(np.array([[0.1282, 0.8718], [0, 1]]), columns=['Home', 'NotHome'], index=['Home', 'NotHome'])
print(occ)

In [45]:
H1_p1_probs.Home[0]
hours = [str(x).zfill(2) + ':00' for x in range(0,24)]
all_h = [int(h[0:2]) for h in hours]
print(all_h)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]


In [61]:
# hours_occ[h] = (1-p_leave, p_leave)      # if home: (probability stay home, probability leave)
# hours_unocc[h] = (p_arrive, 1-p_arrive)  # if gone: (probability arrive, probability stay out)


inds = t3.index
# print(H1_p1_probs.Home)
# print(H1_p1_probs.notHome)
hours = [str(x).zfill(2) + ':00' for x in range(0,24)]
all_h = [int(h[0:2]) for h in hours]



for h in all_h:
    home_home, home_away = H1_p1_probs.Home[h][0], H1_p1_probs.Home[h][1]
    away_home, away_away = H1_p1_probs.notHome[h][0], H1_p1_probs.notHome[h][1]
    
    df_HH = t3*home_home
    df_HA = t3*home_away
    df_givenH = pd.concat([df_HH, df_HA], axis = 1)

    df_AH = t3*away_home
    df_AA = t3*away_away
    df_givenA = pd.concat([df_AH, df_AA], axis = 1)
    
    full_df = pd.concat([df_givenH, df_givenA], axis = 0 )
    break
    
full_df.isna().sum().sum()




#     home_nothome = H1_p1_probs.Home[h][1]
#     nothome_nothome = H1_p1_probs.notHome[h][1]


    
    
    
# for index, row in occ.iterrows():
#     df_ind = pd.DataFrame(index=inds)
#     for col in occ.columns:
#         new_df = row[col]*t3
#         df_ind = pd.concat([df_ind, new_df], axis=1)
#     fin_df = df_ind
#     col_df = pd.DataFrame(columns=fin_df.columns)
#     col_df = pd.concat([col_df, fin_df])
# #     cols = df.columns
    
# print(col_df)

0

In [102]:
np_df = full_df.to_numpy()

n = [np_df, np_df]
new_n = np.asarray(n)
print(len(new_n))
print(len(new_n[0][0]))


l = [[1,3,5], [1,6,7], [7,2,1], [1,3,5], [1,2,1]]
l_np = np.asarray(l)
print(l_np)
np_l = l_np.transpose()
print(np_l)


df1 = pd.DataFrame({'col11' : [1.0] * 5, 
                   'col12' : [2.0] * 5, 
                   'col13' : [3.0] * 5 }, index = range(1,6),)
df.values
l.append([1,1,1])
l = []
l.append(df1.to_numpy())
l.append(df1.to_numpy())

In [133]:
# print((np_l.max()))
print((l_np))
print(len(l_np))
print(len(l_np[0]))

l = [[1,3,5], [1,6,7], [7,2,1], [1,3,5], [1,2,1]]
l_np = np.asarray(l)
print(l_np)
print(len(l_np))

[[1 3 5]
 [1 6 7]
 [7 2 1]
 [1 3 5]
 [1 2 1]]
5
3
[[1 3 5]
 [1 6 7]
 [7 2 1]
 [1 3 5]
 [1 2 1]]
5


In [205]:
df = pd.DataFrame({'col1' : [1.0] * 5, 
                   'col2' : [2.0] * 5, 
                   'col3' : [3.0] * 5 }, index = range(1,6),)

df1 = pd.DataFrame({'col11' : [1.0] * 5, 
                   'col12' : [2.0] * 5, 
                   'col13' : [3.0] * 5 }, index = range(1,6),)


df2 = pd.DataFrame({'col1' : [10.0] * 5, 
                    'col2' : [100.0] * 5, 
                    'col3' : [1000.0] * 5 }, index = range(1,6),)


# d3 = df*df2
# print(d3)
# print(df1)
# df1.sum(axis=1)

new = df1.div(df1.sum(axis=1), axis=0)

# print(len(new.sum(axis=1)))
# df = pd.concat([df, df1])
# print(df)

for index, row in df1.iterrows():
    if row.sum() != 0:
        curr_row = row
    else:
        print('not')
    print((curr_row.sum()))

6.0
6.0
6.0
6.0
6.0


In [None]:
for df

In [321]:
numpy_df = df.to_numpy()
numpy_nans = numpy_df.sum()

print(type(numpy_nans))

if np.isnan(numpy_df.sum()):
    print('oh no! Nans in df')
else:
    print('good to go')
    
np.isnan(numpy_nans)

<class 'numpy.float64'>
good to go


False

In [297]:

idim = 3
jdim = 2
n = np.zeros((idim*jdim,idim))
print(n)
# n[1,2] +=1/0
# print(n)
# print(n.sum())

dfn = pd.DataFrame(data=n)
print(dfn)
dfn = dfn.div(dfn.sum(axis=1), axis=0).fillna(value=0.0)
print(dfn)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
     0    1    2
0  0.0  0.0  0.0
1  0.0  0.0  0.0
2  0.0  0.0  0.0
3  0.0  0.0  0.0
4  0.0  0.0  0.0
5  0.0  0.0  0.0
     0    1    2
0  0.0  0.0  0.0
1  0.0  0.0  0.0
2  0.0  0.0  0.0
3  0.0  0.0  0.0
4  0.0  0.0  0.0
5  0.0  0.0  0.0


In [342]:
from __future__ import division
import numpy as np
from scipy import sparse as sp

npvec = np.tile( [0,0,0,0,1.], 20 )
Acsr = sp.csr_matrix(npvec)
Acoo = Acsr.tocoo()


print(type(npvec))
print(type(Acsr))
print(type(Acoo))

x = np.dot(npvec,  Acsr)
# y = npvec*Acsr
print(y.size)
print(x.size)

# for A in (Acsr, Acoo, npvec):
#     print("\n%s" % type(A))
#     for B in (Acsr, Acoo, npvec):
#         print ("+ %s = " % type(B) )
#         try:
#             AplusB = A + B
#             print( type(AplusB))
#         except StandardError, errmsg:
#             print("Error", errmsg)

<class 'numpy.ndarray'>
<class 'scipy.sparse.csr.csr_matrix'>
<class 'scipy.sparse.coo.coo_matrix'>
1
100


In [378]:
cols1 = [i for i in range(-5, 5)]
cols2 = [j for j in range(3,6)]

new_col = [f'({x},{y})' for x in cols1 for y in cols2]
# print(cols1)
# print(new_col)

new_cols1 = []
for col in new_col:
    new_cols1.append(col.strip(')') + ',1)')
print(len(new_cols1))
    
new_cols2 = []
for col in new_col:
    new_cols2.append(col.strip(')') + ',0)')

all_new = new_cols1 + new_cols2
print(len(all_new))

ind_list = [f'{i+1},{x.strip("()")}' for i, x in enumerate(all_new) ]
ind_list = [x.split(',') for x in ind_list]
np.savetxt('/Users/maggie/Desktop/index_test_save.txt', ind_list, fmt='%s', delimiter='\t')
print(ind_list)

# for i, x in enumerate(all_new):
#     print(i)





30
60
[['1', '-5', '3', '1'], ['2', '-5', '4', '1'], ['3', '-5', '5', '1'], ['4', '-4', '3', '1'], ['5', '-4', '4', '1'], ['6', '-4', '5', '1'], ['7', '-3', '3', '1'], ['8', '-3', '4', '1'], ['9', '-3', '5', '1'], ['10', '-2', '3', '1'], ['11', '-2', '4', '1'], ['12', '-2', '5', '1'], ['13', '-1', '3', '1'], ['14', '-1', '4', '1'], ['15', '-1', '5', '1'], ['16', '0', '3', '1'], ['17', '0', '4', '1'], ['18', '0', '5', '1'], ['19', '1', '3', '1'], ['20', '1', '4', '1'], ['21', '1', '5', '1'], ['22', '2', '3', '1'], ['23', '2', '4', '1'], ['24', '2', '5', '1'], ['25', '3', '3', '1'], ['26', '3', '4', '1'], ['27', '3', '5', '1'], ['28', '4', '3', '1'], ['29', '4', '4', '1'], ['30', '4', '5', '1'], ['31', '-5', '3', '0'], ['32', '-5', '4', '0'], ['33', '-5', '5', '0'], ['34', '-4', '3', '0'], ['35', '-4', '4', '0'], ['36', '-4', '5', '0'], ['37', '-3', '3', '0'], ['38', '-3', '4', '0'], ['39', '-3', '5', '0'], ['40', '-2', '3', '0'], ['41', '-2', '4', '0'], ['42', '-2', '5', '0'], ['43', '-