### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [None]:
from Preprocessing.time_series_preprocessing import get_dengue_dataset
import matplotlib.pyplot as plt
from epiweeks import Week
import datetime

In [None]:
labels = get_dengue_dataset(labels_path='../Tabular_data/dengue_tabular.csv', embeddings_path=None, municipality='Neiva')

labels = labels.reset_index()
labels.rename(columns={'index': 'Date'}, inplace=True)

labels = labels[(labels['Date'] > 201552) & (labels['Date'] < 201901)]

labels['Date'] = labels['Date'].apply(lambda x: Week.fromstring(str(x)).enddate())
labels

In [None]:
# Plotting the time series of given dataframe
plt.plot(labels.Date, labels.Labels)
 
# Giving title to the chart using plt.title
plt.title('Classes by Date')
 
# rotating the x-axis tick labels at 30degree
# towards right
plt.xticks(rotation=30, ha='right')

# Providing x and y label to the chart
plt.xlabel('Date')
plt.ylabel('Classes')

In [None]:
from scipy.stats import ttest_ind_from_stats

mean1 = 115.853
std1 = 8.798
n1 = 5

mean2 = 100.988
std2 = 0.795
n2 = 5

tstat, pvalue = ttest_ind_from_stats(mean1, std1, n1, mean2, std2, n2)
pvalue

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM, Attention, GlobalAveragePooling1D
from tensorflow.keras.models import Model

n_features = 10

# Define the inpu
input_seq = Input(shape=(None, n_features))  # Input shape: (batch_size, sequence_length, n_features)

# Define the LSTM layer and get the output sequence
lstm_output = LSTM(64, return_sequences=True)(input_seq)  # Output shape: (batch_size, sequence_length, 64)

# Apply attention to the LSTM output sequence
attention_output = Attention()([lstm_output, lstm_output])  # Output shape: (batch_size, 64)

pool_output = GlobalAveragePooling1D()(attention_output)

# Add a dense layer and output layer
output = Dense(1, activation='sigmoid')(pool_output)  # Output shape: (batch_size, 1)

# Define the model
model = Model(inputs=input_seq, outputs=output)


In [None]:
model.summary()

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Define the input layer
input_seq = Input(shape=(None, n_features))  # Input shape: (batch_size, sequence_length, n_features)

# Define the Transformer encoder layer
encoder_output = tf.keras.layers.MultiHeadAttention(num_heads=8, key_dim=64)(input_seq, input_seq)
# Encoder output shape: (batch_size, sequence_length, hidden_size)
encoder_output = tf.keras.layers.Dropout(0.1)(encoder_output)
encoder_output = tf.keras.layers.LayerNormalization(epsilon=1e-6)(encoder_output)

# Apply a feed-forward neural network to the output of the Transformer encoder
ffn_output = tf.keras.layers.Dense(64, activation='relu')(encoder_output)
# FNN output shape: (batch_size, sequence_length, 64)
ffn_output = tf.keras.layers.Dense(32, activation='relu')(ffn_output)

pool_output = GlobalAveragePooling1D()(ffn_output)

# FNN output shape: (batch_size, sequence_length, 32)
ffn_output = tf.keras.layers.Dropout(0.1)(pool_output)

# Output layer
output = Dense(1, activation='sigmoid')(ffn_output)  # Output shape: (batch_size, 1)

# Define the model
model = Model(inputs=input_seq, outputs=output)


In [None]:
model.summary()

In [None]:
import os

import numpy as np

from skimage import io


In [None]:
def count_black(image):
    
    black_pixels = np.count_nonzero(image==0)
    #print(black_pixels)
    pixels = image.shape[0] * image.shape[1] * image.shape[2]
    
    if black_pixels == pixels:
        #print('The image is black')
        return True
    else:
        return False
    

In [None]:
os.listdir(path)

In [None]:
""" Read Dataset:"""
path = '../Dataset_10_best_cities/'

for municipality in os.listdir(path):
    count = 0
    municipality_path = os.path.join(path, municipality)
    for image_name in os.listdir(municipality_path):
        image_path = os.path.join(municipality_path, image_name)
        # Skip directory
        if os.path.isdir(image_path):
            #print(f'Directory: {image_path}')
            continue
        image = io.imread(image_path)
        is_black = count_black(image)
        if is_black:
            count += 1
        
    print(f'The number of black images for {municipality} are: {count}')
        
        

In [None]:
""" Read Dataset:"""
path = '../FULL_COLOMBIA_v2/'

for municipality in os.listdir(path):
    count = 0
    municipality_path = os.path.join(path, municipality)
    for image_name in os.listdir(municipality_path):
        image_path = os.path.join(municipality_path, image_name)
        # Skip directory
        if os.path.isdir(image_path):
            #print(f'Directory: {image_path}')
            continue
        image = io.imread(image_path)
        is_black = count_black(image)
        if is_black:
            count += 1
        
    print(f'The number of black images for {municipality} are: {count}')
        

In [5]:
#!pip install keras-cv
from keras_cv.models import ViTTiny16
import tensorflow as tf

In [12]:
inputs = tf.keras.layers.Input(shape=(224, 224, 3))

vit = ViTTiny16(
        include_rescaling=False,
        include_top=False,
        name="ViTTiny32",
        weights="imagenet",
        input_tensor=inputs,
        pooling="token_pooling",
        activation=tf.keras.activations.gelu,
    )

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
from Models.Pretrained_DL_Models import get_backbone

backbone = 'ConvNeXtTiny' # 'ViT' # 'ConvNeXtTiny' # 'ConvNeXtSmall' # 'ConvNeXtBase' # 'ResNet50V2' # 'VGG16' # 'MobileNetV2'
weights = 'imagenet' # 'imagenet' # None # 'sentinel_vae' # 'sentinel_ae'
freeze = False
cnn_base = get_backbone(target_size=(224, 224, 3), backbone=backbone, freeze=True, weights=weights)
cnn_base

You do not have pycocotools installed, so KerasCV pycoco metrics are not available. Please run `pip install pycocotools`.
You do not have pyococotools installed, so the `PyCOCOCallback` API is not available.
You do not have Waymo Open Dataset installed, so KerasCV Waymo metrics are not available.


<keras.engine.sequential.Sequential at 0x7ef86c5b6d90>

In [3]:
cnn_base.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 convnext_tiny (Functional)  (None, 7, 7, 768)         27820128  
                                                                 
 global_average_pooling2d (G  (None, 768)              0         
 lobalAveragePooling2D)                                          
                                                                 
Total params: 27,820,128
Trainable params: 0
Non-trainable params: 27,820,128
_________________________________________________________________


In [4]:
from tensorflow.keras.layers import Dense
import tensorflow as tf


model = tf.keras.Sequential()
model.add(tf.keras.layers.TimeDistributed(cnn_base, input_shape = ((3,) + (224, 224, 3))))
#model.add(tf.keras.layers.TimeDistributed(Flatten()))
model.add(tf.keras.layers.TimeDistributed(Dense(1024)))
model.add(tf.keras.layers.LSTM(120, dropout=0.1, return_sequences=True))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 3, 768)           27820128  
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 3, 1024)          787456    
 tributed)                                                       
                                                                 
 lstm (LSTM)                 (None, 3, 120)            549600    
                                                                 
Total params: 29,157,184
Trainable params: 1,337,056
Non-trainable params: 27,820,128
_________________________________________________________________
