# Chapter 1: Pandas Foundations

## Recipes
* [Dissecting the anatomy of a DataFrame](#Dissecting-the-anatomy-of-a-DataFrame)
* [Accessing the main DataFrame components](#Accessing-the-main-DataFrame-components)
* [Understanding data types](#Understanding-data-types)
* [Selecting a single column of data as a Series](#Selecting-a-single-column-of-data-as-a-Series)
* [Calling Series methods](#Calling-Series-methods)
* [Working with operators on a Series](#Working-with-operators-on-a-Series)
* [Chaining Series methods together](#Chaining-Series-methods-together)
* [Making the index meaningful](#Making-the-index-meaningful)
* [Renaming row and column names](#Renaming-row-and-column-names)
* [Creating and deleting columns](#Creating-and-deleting-columns)

In [1]:
import pandas as pd
import numpy as np

# Dissecting the anatomy of a DataFrame

#### Change options to get specific output for book

In [2]:
# pd.set_option('max_columns', 8, 'max_rows', 10)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 21)

In [3]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')
signals.head()

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Maximal consecutive loss orders,Maximal balance drawdown pct,Relative drawdown by balance amount,Relative drawdown by equity amount
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,8,11.87,2505.44,3553.24
2020078,10,EURSpecial,30,823,...,4,6.09,53.3,342.76
2105300,6,Arnold,30,1,...,9,16.19,802.9,329.8
2049959,9,MAX NoLimit,50,692,...,7,5.81,753.57,2483.64
2145152,12,PabloFX Safe,30,677,...,1,2.93,14.1,500.97


![dataframe anatomy](../images/ch01_dataframe_anatomy.png)

# Accessing the main DataFrame components

In [4]:
columns = signals.columns
index = signals.index
data = signals.values

In [5]:
columns

Index(['Rating', 'Signals', 'Price', 'Growth', 'Subscribers', 'Funds',
       'Balance', 'Weeks', 'Drawdown', 'Trades', 'Profit Trades',
       'Loss Trades', 'Best trade', 'Worst trade', 'Gross Profit',
       'Gross Loss', 'Maximum consecutive wins', 'Maximal consecutive profit',
       'Sharpe Ratio', 'Trading activity', 'Max deposit load', 'Latest trade',
       'Trades per week', 'Avg holding time', 'Recovery Factor', 'Long Trades',
       'Short Trades', 'Profit Factor', 'Expected Payoff', 'Average Profit',
       'Average Loss', 'Maximum consecutive losses',
       'Maximal consecutive loss', 'Monthly growth', 'Annual Forecast',
       'Algo trading', 'Absolute balance drawdown', 'Maximal balance drawdown',
       'Relative drawdown by balance', 'Relative drawdown by equity', 'Months',
       'Average By Month', 'Std By Month', 'Profit Trades Pct',
       'Loss Trades Pct', 'Gross Profit Pct', 'Gross Loss Pips',
       'Maximum consecutive wins amount', 'Maximal consecutive prof

In [6]:
index

Index([1371953, 2020078, 2105300, 2049959, 2145152, 1463352, 2084523, 2195366,
       2115303, 2009468,
       ...
       2044448, 2192054, 1722005, 1808747, 2216572, 2211292, 2207077, 1566301,
       2158945, 1614162],
      dtype='int64', name='Signal', length=4313)

In [7]:
data

array([['1', 'Activity', 30, ..., 11.87, 2505.44, 3553.24],
       ['10', 'EURSpecial', 30, ..., 6.09, 53.3, 342.76],
       ['6', 'Arnold', 30, ..., 16.19, 802.9, 329.8],
       ...,
       ['5688', 'High Risk', 30, ..., 39.86, 1539.94, 8806.67],
       ['5693', '2', 30, ..., 92.19, 1081.51, 210.08],
       ['5720', '345', 30, ..., 59.0, 15659.99, 4213.67]], dtype=object)

In [8]:
type(index)

pandas.core.indexes.base.Index

In [9]:
type(columns)

pandas.core.indexes.base.Index

In [10]:
type(data)

numpy.ndarray

In [11]:
issubclass(pd.RangeIndex, pd.Index)

True

## There's more

In [12]:
index.values

array([1371953, 2020078, 2105300, ..., 1566301, 2158945, 1614162])

In [13]:
columns.values

array(['Rating', 'Signals', 'Price', 'Growth', 'Subscribers', 'Funds',
       'Balance', 'Weeks', 'Drawdown', 'Trades', 'Profit Trades',
       'Loss Trades', 'Best trade', 'Worst trade', 'Gross Profit',
       'Gross Loss', 'Maximum consecutive wins',
       'Maximal consecutive profit', 'Sharpe Ratio', 'Trading activity',
       'Max deposit load', 'Latest trade', 'Trades per week',
       'Avg holding time', 'Recovery Factor', 'Long Trades',
       'Short Trades', 'Profit Factor', 'Expected Payoff',
       'Average Profit', 'Average Loss', 'Maximum consecutive losses',
       'Maximal consecutive loss', 'Monthly growth', 'Annual Forecast',
       'Algo trading', 'Absolute balance drawdown',
       'Maximal balance drawdown', 'Relative drawdown by balance',
       'Relative drawdown by equity', 'Months', 'Average By Month',
       'Std By Month', 'Profit Trades Pct', 'Loss Trades Pct',
       'Gross Profit Pct', 'Gross Loss Pips',
       'Maximum consecutive wins amount',
       'Max

# Understanding data types

In [14]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')

In [15]:
signals.dtypes

Rating                                  object
Signals                                 object
Price                                    int64
Growth                                   int64
Subscribers                              int64
                                        ...   
Maximum consecutive losses amount      float64
Maximal consecutive loss orders          int64
Maximal balance drawdown pct           float64
Relative drawdown by balance amount    float64
Relative drawdown by equity amount     float64
Length: 56, dtype: object

In [16]:
signals.value_counts()

Rating  Signals          Price  Growth  Subscribers  Funds  Balance  Weeks  Drawdown  Trades  Profit Trades  Loss Trades  Best trade  Worst trade  Gross Profit  Gross Loss  Maximum consecutive wins  Maximal consecutive profit  Sharpe Ratio  Trading activity  Max deposit load  Latest trade  Trades per week  Avg holding time  Recovery Factor  Long Trades  Short Trades  Profit Factor  Expected Payoff  Average Profit  Average Loss  Maximum consecutive losses  Maximal consecutive loss  Monthly growth  Annual Forecast  Algo trading  Absolute balance drawdown  Maximal balance drawdown  Relative drawdown by balance  Relative drawdown by equity  Months  Average By Month  Std By Month  Profit Trades Pct  Loss Trades Pct  Gross Profit Pct  Gross Loss Pips  Maximum consecutive wins amount  Maximal consecutive profit orders  Long Trades Pct  Short Trades Pct  Maximum consecutive losses amount  Maximal consecutive loss orders  Maximal balance drawdown pct  Relative drawdown by balance amount  Relati

# Selecting a single column of data as a Series

![dataframe anatomy](../images/ch01_series_anatomy.png)

In [17]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')

In [18]:
signals['Growth']

Signal
1371953      3
2020078    823
2105300      1
2049959    692
2145152    677
          ... 
2211292     65
2207077     70
1566301     65
2158945     66
1614162     70
Name: Growth, Length: 4313, dtype: int64

In [19]:
signals['Growth'].dtypes

dtype('int64')

In [20]:
type(signals['Growth'])

pandas.core.series.Series

## There's more

In [21]:
signal_growth = signals['Growth']
signal_growth.name

'Growth'

In [22]:
signal_growth.to_frame().head()

Unnamed: 0_level_0,Growth
Signal,Unnamed: 1_level_1
1371953,3
2020078,823
2105300,1
2049959,692
2145152,677


# Calling Series methods

## Getting ready...

In [23]:
s_attr_methods = set(dir(pd.Series))
len(s_attr_methods)

411

In [24]:
df_attr_methods = set(dir(pd.DataFrame))
len(df_attr_methods)

427

In [25]:
len(s_attr_methods & df_attr_methods)

357

## How to do it...

In [26]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')
signals.columns

Index(['Rating', 'Signals', 'Price', 'Growth', 'Subscribers', 'Funds',
       'Balance', 'Weeks', 'Drawdown', 'Trades', 'Profit Trades',
       'Loss Trades', 'Best trade', 'Worst trade', 'Gross Profit',
       'Gross Loss', 'Maximum consecutive wins', 'Maximal consecutive profit',
       'Sharpe Ratio', 'Trading activity', 'Max deposit load', 'Latest trade',
       'Trades per week', 'Avg holding time', 'Recovery Factor', 'Long Trades',
       'Short Trades', 'Profit Factor', 'Expected Payoff', 'Average Profit',
       'Average Loss', 'Maximum consecutive losses',
       'Maximal consecutive loss', 'Monthly growth', 'Annual Forecast',
       'Algo trading', 'Absolute balance drawdown', 'Maximal balance drawdown',
       'Relative drawdown by balance', 'Relative drawdown by equity', 'Months',
       'Average By Month', 'Std By Month', 'Profit Trades Pct',
       'Loss Trades Pct', 'Gross Profit Pct', 'Gross Loss Pips',
       'Maximum consecutive wins amount', 'Maximal consecutive prof

In [27]:
avg_by_month = signals['Average By Month']
monthly_growth = signals['Monthly growth']

In [28]:
avg_by_month.head()

Signal
1371953    15.487500
2020078    17.475333
2105300    14.050000
2049959     3.911636
2145152    23.438000
Name: Average By Month, dtype: float64

In [29]:
monthly_growth.head()

Signal
1371953    5.06
2020078    6.24
2105300    1.96
2049959    1.52
2145152    9.57
Name: Monthly growth, dtype: float64

In [30]:
# pd.set_option('max_rows', 8)
pd.set_option('display.max_rows', 10)
weeks = signals['Weeks']
weeks.value_counts()

Weeks
8      139
2      122
3      122
13     111
4      109
      ... 
345      1
352      1
164      1
198      1
460      1
Name: count, Length: 281, dtype: int64

In [31]:
months = signals['Months']
months.value_counts()

Months
3      440
4      438
2      425
1      375
6      276
      ... 
91       1
101      1
118      1
122      1
107      1
Name: count, Length: 91, dtype: int64

In [32]:
months.size

4313

In [33]:
months.shape

(4313,)

In [34]:
len(months)

4313

In [35]:
months.count()

4313

In [36]:
monthly_growth.count()

4308

In [37]:
monthly_growth.quantile() # 50th percentile by default

5.91

In [38]:
monthly_growth.quantile([0.13, 0.21, 0.34, 0.55, 0.89])

0.13     1.3100
0.21     2.1300
0.34     3.5100
0.55     6.9600
0.89    26.5699
Name: Monthly growth, dtype: float64

In [39]:
monthly_growth.min(), monthly_growth.max(), \
monthly_growth.mean(), monthly_growth.median(), \
monthly_growth.std(), monthly_growth.sum()

(0.0, 245.5, 11.298300835654597, 5.91, 15.62578284056739, 48673.08)

In [40]:
monthly_growth.describe()

count    4308.000000
mean       11.298301
std        15.625783
min         0.000000
25%         2.540000
50%         5.910000
75%        13.942500
max       245.500000
Name: Monthly growth, dtype: float64

In [41]:
avg_by_month.describe()

count    4313.000000
mean        6.929626
std        10.235251
min       -63.210000
25%         2.103750
50%         4.674750
75%         9.690000
max        89.022500
Name: Average By Month, dtype: float64

In [42]:
avg_by_month.quantile(.2)

1.564860606060606

In [43]:
temp = avg_by_month
temp.dropna(inplace=True)
temp.sort_values(ascending=True).tail(13)

Signal
2198096    67.170000
2220546    70.103333
2183180    70.122500
2187806    71.250000
2203298    72.150000
             ...    
2198627    81.043333
2177787    81.632500
2181709    81.700000
2216464    85.765000
2217753    89.022500
Name: Average By Month, Length: 13, dtype: float64

In [44]:
# avg_by_month.quantile()
# avg_by_month.quantile(.2)
avg_by_month.quantile([.1, .2, .3, .4, .5, ])

0.1   -0.271333
0.2    1.564861
0.3    2.553000
0.4    3.553429
0.5    4.674750
Name: Average By Month, dtype: float64

In [45]:
avg_by_month.quantile([.1, .2, .3, .4, .5, .6, .7, .8, .9])

0.1    -0.271333
0.2     1.564861
0.3     2.553000
0.4     3.553429
0.5     4.674750
0.6     6.106333
0.7     8.237733
0.8    11.537500
0.9    17.246400
Name: Average By Month, dtype: float64

In [46]:
monthly_growth.isnull()

Signal
1371953    False
2020078    False
2105300    False
2049959    False
2145152    False
           ...  
2211292    False
2207077    False
1566301    False
2158945    False
1614162    False
Name: Monthly growth, Length: 4313, dtype: bool

In [47]:
monthly_growth.isnull().value_counts()

Monthly growth
False    4308
True        5
Name: count, dtype: int64

In [48]:
monthly_growth.value_counts()

Monthly growth
0.00     24
0.71     11
2.02     10
2.61     10
0.61     10
         ..
13.67     1
19.84     1
21.46     1
50.21     1
19.22     1
Name: count, Length: 2039, dtype: int64

In [49]:
signals.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4313 entries, 1371953 to 1614162
Data columns (total 56 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Rating                               4313 non-null   object 
 1   Signals                              4313 non-null   object 
 2   Price                                4313 non-null   int64  
 3   Growth                               4313 non-null   int64  
 4   Subscribers                          4313 non-null   int64  
 5   Funds                                4313 non-null   int64  
 6   Balance                              4313 non-null   int64  
 7   Weeks                                4313 non-null   int64  
 8   Drawdown                             4313 non-null   int64  
 9   Trades                               4313 non-null   int64  
 10  Profit Trades                        4313 non-null   int64  
 11  Loss Trades               

In [50]:
signals.columns[signals.isna().any()].tolist()

['Trading activity', 'Profit Factor', 'Monthly growth', 'Annual Forecast']

In [51]:
monthly_growth_filled = monthly_growth.fillna(0)
monthly_growth_filled.count()

4313

In [52]:
monthly_growth_dropped = monthly_growth.dropna()
monthly_growth_dropped.size

4308

## There's more...

In [53]:
weeks

Signal
1371953    117
2020078     62
2105300     79
2049959    238
2145152     39
          ... 
2211292     48
2207077    460
1566301    107
2158945     46
1614162    134
Name: Weeks, Length: 4313, dtype: int64

In [54]:
weeks.value_counts(normalize=True)

Weeks
8      0.032228
2      0.028287
3      0.028287
13     0.025736
4      0.025272
         ...   
345    0.000232
352    0.000232
164    0.000232
198    0.000232
460    0.000232
Name: proportion, Length: 281, dtype: float64

In [55]:
annual_forecast = signals['Annual Forecast']

In [56]:
annual_forecast.hasnans

True

In [57]:
annual_forecast.notnull()

Signal
1371953    True
2020078    True
2105300    True
2049959    True
2145152    True
           ... 
2211292    True
2207077    True
1566301    True
2158945    True
1614162    True
Name: Annual Forecast, Length: 4313, dtype: bool

# Working with operators on a Series

In [58]:
pd.options.display.max_rows = 6

In [59]:
5 + 9    # plus operator example. Adds 5 and 9

14

In [60]:
4 ** 2   # exponentiation operator. Raises 4 to the second power

16

In [61]:
a = 10   # assignment operator.

In [62]:
5 <= 9   # less than or equal to operator

True

In [63]:
'abcde' + 'fg'    # plus operator for strings. C

'abcdefg'

In [64]:
not (5 <= 9)      # not is an operator that is a reserved keyword and reverse a boolean

False

In [65]:
7 in [1, 2, 6]    # in operator checks for membership of a list

False

In [66]:
set([1,2,3]) & set([2,3,4])

{2, 3}

In [67]:
# [1, 2, 3] - 3   # TypeError: unsupported operand type(s) for -: 'list' and 'int'

In [68]:
# a = set([1,2,3])     
# a[0]                 # the indexing operator does not work with sets | TypeError: 'set' object does not support indexing

## Getting ready...

In [69]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')
signals.columns

Index(['Rating', 'Signals', 'Price', 'Growth', 'Subscribers', 'Funds',
       'Balance', 'Weeks', 'Drawdown', 'Trades', 'Profit Trades',
       'Loss Trades', 'Best trade', 'Worst trade', 'Gross Profit',
       'Gross Loss', 'Maximum consecutive wins', 'Maximal consecutive profit',
       'Sharpe Ratio', 'Trading activity', 'Max deposit load', 'Latest trade',
       'Trades per week', 'Avg holding time', 'Recovery Factor', 'Long Trades',
       'Short Trades', 'Profit Factor', 'Expected Payoff', 'Average Profit',
       'Average Loss', 'Maximum consecutive losses',
       'Maximal consecutive loss', 'Monthly growth', 'Annual Forecast',
       'Algo trading', 'Absolute balance drawdown', 'Maximal balance drawdown',
       'Relative drawdown by balance', 'Relative drawdown by equity', 'Months',
       'Average By Month', 'Std By Month', 'Profit Trades Pct',
       'Loss Trades Pct', 'Gross Profit Pct', 'Gross Loss Pips',
       'Maximum consecutive wins amount', 'Maximal consecutive prof

In [70]:
EP = signals['Expected Payoff']
EP

Signal
1371953    9.07
2020078    0.60
2105300    3.50
           ... 
1566301    1.40
2158945    0.10
1614162    0.63
Name: Expected Payoff, Length: 4313, dtype: float64

In [71]:
EP + 1

Signal
1371953    10.07
2020078     1.60
2105300     4.50
           ...  
1566301     2.40
2158945     1.10
1614162     1.63
Name: Expected Payoff, Length: 4313, dtype: float64

In [72]:
EP * 2.5

Signal
1371953    22.675
2020078     1.500
2105300     8.750
            ...  
1566301     3.500
2158945     0.250
1614162     1.575
Name: Expected Payoff, Length: 4313, dtype: float64

In [73]:
EP // 7

Signal
1371953    1.0
2020078    0.0
2105300    0.0
          ... 
1566301    0.0
2158945    0.0
1614162    0.0
Name: Expected Payoff, Length: 4313, dtype: float64

In [74]:
EP > 7

Signal
1371953     True
2020078    False
2105300    False
           ...  
1566301    False
2158945    False
1614162    False
Name: Expected Payoff, Length: 4313, dtype: bool

In [75]:
PF = signals['Profit Factor']

In [76]:
PF

Signal
1371953    2.12
2020078    2.10
2105300    2.14
           ... 
1566301    1.35
2158945    1.02
1614162    1.15
Name: Profit Factor, Length: 4313, dtype: float64

In [77]:
PF == 2.10

Signal
1371953    False
2020078     True
2105300    False
           ...  
1566301    False
2158945    False
1614162    False
Name: Profit Factor, Length: 4313, dtype: bool

## There's more...

In [78]:
PF.add(1)              # imdb_score + 1

Signal
1371953    3.12
2020078    3.10
2105300    3.14
           ... 
1566301    2.35
2158945    2.02
1614162    2.15
Name: Profit Factor, Length: 4313, dtype: float64

In [79]:
PF.mul(2.5)            # imdb_score * 2.5

Signal
1371953    5.300
2020078    5.250
2105300    5.350
           ...  
1566301    3.375
2158945    2.550
1614162    2.875
Name: Profit Factor, Length: 4313, dtype: float64

In [80]:
PF.floordiv(7)         # imdb_score // 7

Signal
1371953    0.0
2020078    0.0
2105300    0.0
          ... 
1566301    0.0
2158945    0.0
1614162    0.0
Name: Profit Factor, Length: 4313, dtype: float64

In [81]:
PF.gt(7)               # imdb_score > 7

Signal
1371953    False
2020078    False
2105300    False
           ...  
1566301    False
2158945    False
1614162    False
Name: Profit Factor, Length: 4313, dtype: bool

In [82]:
PF.eq(2.1)   # director == 'James Cameron'

Signal
1371953    False
2020078     True
2105300    False
           ...  
1566301    False
2158945    False
1614162    False
Name: Profit Factor, Length: 4313, dtype: bool

In [83]:
PF.dropna(inplace=True)
PF.astype(int).mod(5)

Signal
1371953    2
2020078    2
2105300    2
          ..
1566301    1
2158945    1
1614162    1
Name: Profit Factor, Length: 4270, dtype: int64

In [84]:
a = type(1)

In [85]:
type(a)

type

In [86]:
a = type(PF)

In [87]:
a([1,2,3])

0    1
1    2
2    3
dtype: int64

# Chaining Series methods together

In [88]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')
signals

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Maximal consecutive loss orders,Maximal balance drawdown pct,Relative drawdown by balance amount,Relative drawdown by equity amount
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,8,11.87,2505.44,3553.24
2020078,10,EURSpecial,30,823,...,4,6.09,53.30,342.76
2105300,6,Arnold,30,1,...,9,16.19,802.90,329.80
...,...,...,...,...,...,...,...,...,...
1566301,5688,High Risk,30,65,...,58,39.86,1539.94,8806.67
2158945,5693,2,30,66,...,11,92.19,1081.51,210.08
1614162,5720,345,30,70,...,1,59.00,15659.99,4213.67


In [89]:
growth = signals['Growth']
mbdp = signals['Maximal balance drawdown pct']

In [90]:
growth

Signal
1371953      3
2020078    823
2105300      1
          ... 
1566301     65
2158945     66
1614162     70
Name: Growth, Length: 4313, dtype: int64

In [91]:
growth.value_counts().head(10)

Growth
1     145
2     102
3      92
     ... 
11     76
8      73
9      72
Name: count, Length: 10, dtype: int64

In [92]:
mbdp.isnull().sum()

0

In [93]:
mbdp.dtype

dtype('float64')

In [94]:
mbdp.fillna(0)\
                .astype(int)\
                .head()

Signal
1371953    11
2020078     6
2105300    16
2049959     5
2145152     2
Name: Maximal balance drawdown pct, dtype: int64

## There's more...

In [95]:
mbdp.isnull().mean()

0.0

In [96]:
(mbdp.fillna(0)
                 .astype(int)
                 .head())

Signal
1371953    11
2020078     6
2105300    16
2049959     5
2145152     2
Name: Maximal balance drawdown pct, dtype: int64

# Making the index meaningful

In [97]:
signals = pd.read_csv('data/mql5_signals_mt4.csv')

In [98]:
signals.shape

(4313, 57)

In [99]:
signals2 = signals.set_index('Signal')
signals2

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Maximal consecutive loss orders,Maximal balance drawdown pct,Relative drawdown by balance amount,Relative drawdown by equity amount
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,8,11.87,2505.44,3553.24
2020078,10,EURSpecial,30,823,...,4,6.09,53.30,342.76
2105300,6,Arnold,30,1,...,9,16.19,802.90,329.80
...,...,...,...,...,...,...,...,...,...
1566301,5688,High Risk,30,65,...,58,39.86,1539.94,8806.67
2158945,5693,2,30,66,...,11,92.19,1081.51,210.08
1614162,5720,345,30,70,...,1,59.00,15659.99,4213.67


In [100]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')

# There's more...

In [101]:
signals.reset_index()

Unnamed: 0,Signal,Rating,Signals,Price,...,Maximal consecutive loss orders,Maximal balance drawdown pct,Relative drawdown by balance amount,Relative drawdown by equity amount
0,1371953,1,Activity,30,...,8,11.87,2505.44,3553.24
1,2020078,10,EURSpecial,30,...,4,6.09,53.30,342.76
2,2105300,6,Arnold,30,...,9,16.19,802.90,329.80
...,...,...,...,...,...,...,...,...,...
4310,1566301,5688,High Risk,30,...,58,39.86,1539.94,8806.67
4311,2158945,5693,2,30,...,11,92.19,1081.51,210.08
4312,1614162,5720,345,30,...,1,59.00,15659.99,4213.67


# Renaming row and column names

In [102]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')
signals

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Maximal consecutive loss orders,Maximal balance drawdown pct,Relative drawdown by balance amount,Relative drawdown by equity amount
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,8,11.87,2505.44,3553.24
2020078,10,EURSpecial,30,823,...,4,6.09,53.30,342.76
2105300,6,Arnold,30,1,...,9,16.19,802.90,329.80
...,...,...,...,...,...,...,...,...,...
1566301,5688,High Risk,30,65,...,58,39.86,1539.94,8806.67
2158945,5693,2,30,66,...,11,92.19,1081.51,210.08
1614162,5720,345,30,70,...,1,59.00,15659.99,4213.67


In [103]:
signals.columns

Index(['Rating', 'Signals', 'Price', 'Growth', 'Subscribers', 'Funds',
       'Balance', 'Weeks', 'Drawdown', 'Trades', 'Profit Trades',
       'Loss Trades', 'Best trade', 'Worst trade', 'Gross Profit',
       'Gross Loss', 'Maximum consecutive wins', 'Maximal consecutive profit',
       'Sharpe Ratio', 'Trading activity', 'Max deposit load', 'Latest trade',
       'Trades per week', 'Avg holding time', 'Recovery Factor', 'Long Trades',
       'Short Trades', 'Profit Factor', 'Expected Payoff', 'Average Profit',
       'Average Loss', 'Maximum consecutive losses',
       'Maximal consecutive loss', 'Monthly growth', 'Annual Forecast',
       'Algo trading', 'Absolute balance drawdown', 'Maximal balance drawdown',
       'Relative drawdown by balance', 'Relative drawdown by equity', 'Months',
       'Average By Month', 'Std By Month', 'Profit Trades Pct',
       'Loss Trades Pct', 'Gross Profit Pct', 'Gross Loss Pips',
       'Maximum consecutive wins amount', 'Maximal consecutive prof

In [104]:
# idx_rename = {'Avatar':'Ratava', 'Spectre': 'Ertceps'} 
col_rename = {'Trading activity':'Activity', 'Subscribers': 'Customers'}

In [105]:
temp = signals.rename(columns=col_rename).head()    # signals.rename(columns=col_rename).head()
temp[['Activity', 'Customers']]


Unnamed: 0_level_0,Activity,Customers
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1
1371953,91.6,26
2020078,42.59,15
2105300,2.11,9
2049959,88.4,142
2145152,86.35,31


# There's more

In [106]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')
index = signals.index
columns = signals.columns

# # using tolist function
# index_list = index.tolist()
# column_list = columns.tolist()

# index_list[0] = 'Ratava'
# index_list[2] = 'Ertceps'
# column_list[1] = 'Director Name'
# column_list[2] = 'Critical Reviews'

In [107]:
# print(index_list[:5])

In [108]:
# print(column_list)

In [109]:
# signals.index = index_list
# signals.columns = column_list

In [110]:
signals.head()

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Maximal consecutive loss orders,Maximal balance drawdown pct,Relative drawdown by balance amount,Relative drawdown by equity amount
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,8,11.87,2505.44,3553.24
2020078,10,EURSpecial,30,823,...,4,6.09,53.3,342.76
2105300,6,Arnold,30,1,...,9,16.19,802.9,329.8
2049959,9,MAX NoLimit,50,692,...,7,5.81,753.57,2483.64
2145152,12,PabloFX Safe,30,677,...,1,2.93,14.1,500.97


# Creating and deleting columns

In [111]:
signals = pd.read_csv('data/mql5_signals_mt4.csv', index_col='Signal')

In [112]:
signals['Score'] = 0

In [113]:
signals.columns

Index(['Rating', 'Signals', 'Price', 'Growth', 'Subscribers', 'Funds',
       'Balance', 'Weeks', 'Drawdown', 'Trades', 'Profit Trades',
       'Loss Trades', 'Best trade', 'Worst trade', 'Gross Profit',
       'Gross Loss', 'Maximum consecutive wins', 'Maximal consecutive profit',
       'Sharpe Ratio', 'Trading activity', 'Max deposit load', 'Latest trade',
       'Trades per week', 'Avg holding time', 'Recovery Factor', 'Long Trades',
       'Short Trades', 'Profit Factor', 'Expected Payoff', 'Average Profit',
       'Average Loss', 'Maximum consecutive losses',
       'Maximal consecutive loss', 'Monthly growth', 'Annual Forecast',
       'Algo trading', 'Absolute balance drawdown', 'Maximal balance drawdown',
       'Relative drawdown by balance', 'Relative drawdown by equity', 'Months',
       'Average By Month', 'Std By Month', 'Profit Trades Pct',
       'Loss Trades Pct', 'Gross Profit Pct', 'Gross Loss Pips',
       'Maximum consecutive wins amount', 'Maximal consecutive prof

In [114]:
# create new actor_director_facebook_likes
signals['Positive_Pos'] = signals['Profit Trades'] >  signals['Loss Trades']
signals['Positive_Numbers'] = signals['Profit Trades'] -  signals['Loss Trades']

In [115]:
signals

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Relative drawdown by equity amount,Score,Positive_Pos,Positive_Numbers
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,3553.24,0,True,1138
2020078,10,EURSpecial,30,823,...,342.76,0,True,760
2105300,6,Arnold,30,1,...,329.80,0,True,1150
...,...,...,...,...,...,...,...,...,...
1566301,5688,High Risk,30,65,...,8806.67,0,True,2262
2158945,5693,2,30,66,...,210.08,0,True,190
1614162,5720,345,30,70,...,4213.67,0,True,3329


In [116]:
signals['Positive_Pos'].isnull()

Signal
1371953    False
2020078    False
2105300    False
           ...  
1566301    False
2158945    False
1614162    False
Name: Positive_Pos, Length: 4313, dtype: bool

In [117]:
signals['Positive_Pos'].isnull().sum()

0

In [118]:
signals[['Maximum consecutive wins', 'Maximum consecutive losses']]

Unnamed: 0_level_0,Maximum consecutive wins,Maximum consecutive losses
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1
1371953,61,11
2020078,28,6
2105300,25,10
...,...,...
1566301,52,58
2158945,19,12
1614162,39,35


In [119]:
# create new "is_cast_likes_more"
signals['is_consecutive_good'] = (signals['Maximum consecutive wins'] >= signals['Maximum consecutive losses'])

In [120]:
signals

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Score,Positive_Pos,Positive_Numbers,is_consecutive_good
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,0,True,1138,True
2020078,10,EURSpecial,30,823,...,0,True,760,True
2105300,6,Arnold,30,1,...,0,True,1150,True
...,...,...,...,...,...,...,...,...,...
1566301,5688,High Risk,30,65,...,0,True,2262,False
2158945,5693,2,30,66,...,0,True,190,True
1614162,5720,345,30,70,...,0,True,3329,True


In [121]:
signals['is_consecutive_good'].value_counts()

is_consecutive_good
True     3903
False     410
Name: count, dtype: int64

In [122]:
signals['is_consecutive_good'].all()

False

In [123]:
signals = signals.drop('Positive_Numbers', axis='columns')
signals

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Relative drawdown by equity amount,Score,Positive_Pos,is_consecutive_good
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,3553.24,0,True,True
2020078,10,EURSpecial,30,823,...,342.76,0,True,True
2105300,6,Arnold,30,1,...,329.80,0,True,True
...,...,...,...,...,...,...,...,...,...
1566301,5688,High Risk,30,65,...,8806.67,0,True,False
2158945,5693,2,30,66,...,210.08,0,True,True
1614162,5720,345,30,70,...,4213.67,0,True,True


## There's more...

In [124]:
signals

Unnamed: 0_level_0,Rating,Signals,Price,Growth,...,Relative drawdown by equity amount,Score,Positive_Pos,is_consecutive_good
Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371953,1,Activity,30,3,...,3553.24,0,True,True
2020078,10,EURSpecial,30,823,...,342.76,0,True,True
2105300,6,Arnold,30,1,...,329.80,0,True,True
...,...,...,...,...,...,...,...,...,...
1566301,5688,High Risk,30,65,...,8806.67,0,True,False
2158945,5693,2,30,66,...,210.08,0,True,True
1614162,5720,345,30,70,...,4213.67,0,True,True


In [125]:
signals.iloc(1)

<pandas.core.indexing._iLocIndexer at 0x7f46f8614d10>