Skip to content

Commit

Permalink
Cositas
Browse files Browse the repository at this point in the history
  • Loading branch information
lucianolorenti committed Aug 18, 2022
2 parents 877885d + 4e78cb5 commit 8b8c613
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 104 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.0.0
current_version = 2.0.6
commit = True
tag = True

Expand Down
19 changes: 19 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
- name: Set up Python ${ matrix.python-version }}
uses: actions/setup-python@v3
with:
<<<<<<< HEAD
python-version: "3.10"
- name: FragileTech/bump-version
uses: FragileTech/bump-version@main
Expand All @@ -26,6 +27,24 @@ jobs:
commit_email: lucianolorenti@gmail.com
login: lucianolorenti@gmail.com
token: "${{ secrets.TOKEN_GITHUB }}"
=======
python-version: "3.10"
- name: Bump version
run: |
git config --global user.name "Bump bot"
git config --global user.email "lucianolorenti@gmail.com"
git config --global pull.rebase false
pip install bump2version
git remote add remote https://lucianolorenti:${{ secrets.TOKEN_GITHUB }}@github.com/$GITHUB_REPOSITORY
git pull --no-edit remote main
bump2version --tag --commit --allow-dirty --commit-args="-a" patch
- name: Push changes
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: main
tags: true
>>>>>>> 4e78cb51c368737a91101483a4120e737525b7d6
- name: Install pypa/build
run: >-
python -m
Expand Down
2 changes: 1 addition & 1 deletion ceruleo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
CACHE_PATH.mkdir(parents=True, exist_ok=True)


__version__ = "2.0.0"
__version__ = "2.0.6"
8 changes: 5 additions & 3 deletions ceruleo/dataset/analysis/numerical_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ def mutual_information(x:np.ndarray, y:np.ndarray):
"monotonicity": lambda x,y:monotonicity(x),
"number_of_unique_elements": lambda x,y:n_unique(x),
'mutual_information': mutual_information,
'null': lambda x, y: null(x)
'null': lambda x, y: null(x),
'entropy': lambda x, y: entropy(x)
}


Expand Down Expand Up @@ -164,7 +165,7 @@ def analysis_single_time_series(
data = defaultdict(lambda: defaultdict(list))
if len(what_to_compute) == 0:
what_to_compute = list(sorted(metrics.keys()))
for column_index in range(X.shape[1]):
for column_index in range(len(column_names)):
column_name = column_names[column_index]
for what in what_to_compute:
x_ts = np.squeeze(X.loc[:, column_name].values)
Expand Down Expand Up @@ -209,6 +210,7 @@ def analysis(
- number_of_unique_elements
- mutual_information
- null
- entropy
Returns:
Expand All @@ -225,7 +227,7 @@ def analysis(
if isinstance(dataset, TransformedDataset):
column_names = dataset.transformer.column_names
else:
column_names = dataset[0].columns
column_names = dataset.numeric_features()
for X, y in iterate_over_features_and_target(dataset):
y = np.squeeze(y)
data = analysis_single_time_series(
Expand Down
62 changes: 39 additions & 23 deletions ceruleo/graphics/analysis.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,60 @@
from typing import List, Optional
import matplotlib.pyplot as plt

from temporis.dataset.ts_dataset import AbstractTimeSeriesDataset


import matplotlib
import matplotlib.pyplot as plt
from ceruleo.dataset.ts_dataset import AbstractTimeSeriesDataset


def correlation_analysis(
dataset: AbstractTimeSeriesDataset,
corr_threshold: float = 0,
features: Optional[List[str]] = None,
ax =None,
**kwargs):
ax: matplotlib.axes.Axes = Optional[None],
**kwargs,
):
"""Plot the correlated features in a dataset
Parameters:
df = correlation_analysis(dataset, corr_threshold, features=list(set(features) - set(['relative_time'])))
df1 = df[(df['Abs mean correlation']>corr_threshold)]

dataset: The dataset
corr_threshold: Minimum threshold to consider that the correlation is high
features: List of features
ax: The axis where to draw
Returns:
ax: the axis
"""

df = correlation_analysis(
dataset, corr_threshold, features=list(set(features) - set(["relative_time"]))
)
df1 = df[(df["Abs mean correlation"] > corr_threshold)]

df1.reset_index(inplace=True)
df1.sort_values(by='Mean Correlation', ascending=True, inplace=True)
df1.sort_values(by="Mean Correlation", ascending=True, inplace=True)
if ax is None:
fig, ax = plt.subplots(**kwargs)
labels = []
for i, (_, r) in enumerate(df1.iterrows()):
f1 = r['Feature 1']
f2 = r['Feature 2']
label = f'{f1}\n{f2}'
ax.barh(y=i,
width=r['Mean Correlation'],
label=label,
xerr=r['Std Correlation'],
color="#7878FF")
f1 = r["Feature 1"]
f2 = r["Feature 2"]
label = f"{f1}\n{f2}"
ax.barh(
y=i,
width=r["Mean Correlation"],
label=label,
xerr=r["Std Correlation"],
color="#7878FF",
)
labels.append(label)

ax.axvline(x=0.90, linestyle='--')
ax.axvline(x=-0.90, linestyle='--')
ax.axvline(x=0.90, linestyle="--")
ax.axvline(x=-0.90, linestyle="--")

ax.set_yticks(list(range(len(labels))))
ax.set_yticklabels(labels)
xticks = ax.get_xticks()

ax.set_xticks([-1,-0.90, -0.5, 0, 0.5, 0.90, 1])
ax.set_xlabel('Correlation')
return ax
ax.set_xticks([-1, -0.90, -0.5, 0, 0.5, 0.90, 1])
ax.set_xlabel("Correlation")
return ax
71 changes: 55 additions & 16 deletions ceruleo/models/keras/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,14 @@

import numpy as np
import tensorflow as tf
from tensorflow.keras import Input, Model, Sequential
from tensorflow.keras import Sequential
from tensorflow.keras import backend as K
from tensorflow.keras.layers import (
Activation,
Add,
BatchNormalization,
Conv1D,
Conv2D,
Dense,
Dropout,
Flatten,
Lambda,
Activation,
GlobalAveragePooling2D,
Permute,
)
from tensorflow.keras import regularizers
from tensorflow.keras.layers import (Activation, BatchNormalization, Conv2D,
Dense, Flatten, Lambda, Layer, Permute,
Reshape)
from tensorflow.python.framework import tensor_shape
from tensorflow.python.keras.layers.pooling import GlobalAveragePooling1D, MaxPool1D
from tensorflow.python.keras.layers.pooling import GlobalAveragePooling1D


def ExpandDimension(dim: int = -1):
Expand Down Expand Up @@ -185,3 +175,52 @@ def call(self, inputs):
residual = tf.keras.backend.sign(residual) * n_sub
residual = RemoveDimension(3)(residual)
return residual + inputs




class ZeroWeights(tf.keras.constraints.Constraint):


def __init__(self, l1:float):
self.l1 = l1

def __call__(self, w):

return (tf.math.multiply(w, tf.cast(tf.abs(w) > self.l1, tf.float32)) )

def get_config(self):
return {'l1': self.l1}


class LASSOLayer(Layer):
def __init__(self, l1:float):
super(LASSOLayer, self).__init__()
self.l1 = l1
self.kernel_regularizer = regularizers.L1(l1)


def build(self, input_shape):
W_size = np.prod(input_shape[1:])
self.w = self.add_weight(
shape=(W_size, ),
initializer="random_normal",
trainable=True,
regularizer=self.kernel_regularizer,
constraint=ZeroWeights(self.l1)
)


self.input_reshape = Reshape((W_size,))
self.output_reshape = Reshape(input_shape[1:])



def call(self, inputs):
x = self.input_reshape(inputs)


x = tf.math.multiply(self.w, x)

self.add_metric(tf.math.reduce_sum(tf.cast(tf.abs(self.w) > 0, tf.float32)), name="Number of features")
return self.output_reshape(x)
13 changes: 0 additions & 13 deletions ceruleo/transformation/features/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,19 +1046,6 @@ class Interactions(TransformerStep):
"""Compute pairwise interactions between the features"""

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Transform the given life computing the iteractions between features
Parameters
----------
X : pd.DataFrame
Input life
Returns
-------
pd.DataFrame
A new dataframe with the same index as the input
with n*(n-1) / 2 columns with the interactions between the features
"""
X_new = pd.DataFrame(index=X.index)
for c1, c2 in itertools.combinations(X.columns, 2):
X_new[f"{c1}_{c2}"] = X[c1] * X[c2]
Expand Down
38 changes: 13 additions & 25 deletions ceruleo/transformation/features/outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,20 @@


class IQROutlierRemover(TransformerStep):
"""
Impute values outside (Q1 - margin*IQR, Q2 + margin*IQR)
"""Remove values outside (Q1 - margin*IQR, Q2 + margin*IQR)
If clip is True the values will be clipped between the range,
otherwise the values are going to be replaced by inf and -inf
Parameters
----------
lower_quantile: float, default 0.25
Lower quantile threshold for the non-anomalous values
upper_quantile: float, default 0.75
Upper quantile threshold for the non-anomalous values
margin: float, default 1.5
How many times the IQR gets multiplied
proportion_to_sample:float, default 1.0
If you want to compute the quantiles in an smaller proportion of data
you can specify it
clip: bool
Wether to clip the values outside the range.
Parameters:
lower_quantile: Lower quantile threshold for the non-anomalous values
upper_quantile: Upper quantile threshold for the non-anomalous values
margin: How many times the IQR gets multiplied
proportion_to_sample: If you want to compute the quantiles in an smaller proportion of data
you can specify it
clip: Wether to clip the values outside the range.
"""

Expand Down Expand Up @@ -123,22 +116,17 @@ def description(self):


class BeyondQuartileOutlierRemover(TransformerStep):
"""
Impute values outside (Q1, Q3)
"""Remove values outside (Q1, Q3)
If clip is True the values will be clipped between the range,
otherwise the values are going to be replaced by inf and -inf
Parameters
----------
lower_quantile: float, default 0.25
Lower quantile threshold for the non-anomalous values
upper_quantile: float, default 0.75
Upper quantile threshold for the non-anomalous values
clip: bool
Wether to clip the values outside the range.
Parameters:
lower_quantile: Lower quantile threshold for the non-anomalous values
upper_quantile: Upper quantile threshold for the non-anomalous values
clip: Wether to clip the values outside the range.
"""

Expand Down
21 changes: 8 additions & 13 deletions ceruleo/transformation/features/rolling_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,19 @@ def apply_rolling_data(values : np.ndarray, function, window, step=1):
sections of length `window` at the data of column `col`. Append
the results to `data` at a new columns with name `label`.
Parameters
----------
data : np.ndarray
1-D Time series of data
function : callable
Function to be called to calculate the rolling window
Parameters:
data: 1-D Time series of data
function: Function to be called to calculate the rolling window
analysis, the function must receive as input an array or
pandas series. Its output must be either a number or a pandas
series
window : int
length of the window to perform the analysis
step : int
step to take between two consecutive windows
window: length of the window to perform the analysis
step: step to take between two consecutive windows
Returns
Returns:
-------
data : np.ndarray
Columns generated by the function applied
data: Columns generated by the function applied
"""

Expand Down
Loading

0 comments on commit 8b8c613

Please sign in to comment.