In [None]:
#1. Scenario: You are building a microservices-based application using Docker. Design a Docker Compose file that sets up three containers: a web server container, a database container, and a cache container. Ensure that the containers can communicate with each other properly.
2. Scenario: You want to scale your Docker containers dynamically based on the incoming traffic. Write a Python script that utilizes Docker SDK to monitor the CPU usage of a container and automatically scales the number of replicas based on a threshold.
3. Scenario: You have a Docker image stored on a private registry. Develop a script in Bash that authenticates with the registry, pulls the latest version of the image, and runs a container based on that image.


In [None]:
#Here's an example of a Docker Compose file that sets up three containers: a web server container, a database container, and a cache container. 

In [None]:
version: '3'
services:
  web:
    image: your-web-image
    ports:
      - 80:80
    depends_on:
      - db
      - cache
    networks:
      - mynetwork

  db:
    image: your-db-image
    networks:
      - mynetwork

  cache:
    image: your-cache-image
    networks:
      - mynetwork

networks:
  mynetwork:


In [None]:
#Here's an example Python script that uses the Docker SDK to monitor the CPU usage of a container and scales the number of replicas based on a threshold. 

In [None]:
import docker
import time

client = docker.from_env()

def get_cpu_usage(container):
    stats = container.stats(stream=False)
    return stats['cpu_stats']['cpu_usage']['total_usage']

def scale_replicas(service, replicas):
    service.scale(replicas=replicas)

def monitor_cpu(container_name, threshold, scale_up_replicas, scale_down_replicas):
    container = client.containers.get(container_name)
    
    while True:
        cpu_usage = get_cpu_usage(container)
        
        if cpu_usage > threshold:
            scale_replicas(container_name, scale_up_replicas)
            print(f'Scaled up replicas of {container_name}')
        elif cpu_usage < threshold and container.attrs['Spec']['Mode']['Replicated']['Replicas'] > 1:
            scale_replicas(container_name, scale_down_replicas)
            print(f'Scaled down replicas of {container_name}')
        
        time.sleep(5)

# Example usage
monitor_cpu('my-container', 80, 3, 1)


In [None]:
#Here's an example Bash script that authenticates with a private Docker registry, pulls the latest version of an image, and runs a container based on that image.

In [None]:
#!/bin/bash

DOCKER_REGISTRY_URL="your-registry-url"
DOCKER_IMAGE_NAME="your-image-name"
DOCKER_IMAGE_TAG="latest"

# Authenticate with the private registry
docker login $DOCKER_REGISTRY_URL

# Pull the latest version of the image
docker pull $DOCKER_REGISTRY_URL/$DOCKER_IMAGE_NAME:$DOCKER_IMAGE_TAG

# Run a container based on the image
docker run -d $DOCKER_REGISTRY_URL/$DOCKER_IMAGE_NAME:$DOCKER_IMAGE_TAG


In [None]:
#1. Scenario: You have a data pipeline that requires executing a shell command as part of a task. Create an Airflow DAG that includes a BashOperator to execute a specific shell command.
#2. Scenario: You want to create dynamic tasks in Airflow based on a list of inputs. Design an Airflow DAG that generates tasks dynamically using PythonOperator, where each task processes an element from the input list.
#3. Scenario: You need to set up a complex task dependency in Airflow, where Task B should start only if Task A has successfully completed. Implement this dependency using the "TriggerDagRunOperator" in Airflow.


In [None]:
#Here's an example of an Airflow DAG that includes a BashOperator to execute a shell command as part of a task:

In [None]:
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime

default_args = {
    'start_date': datetime(2023, 7, 15)
}

dag = DAG('shell_command_dag', default_args=default_args, schedule_interval=None)

task1 = BashOperator(
    task_id='execute_shell_command',
    bash_command='your_shell_command_here',
    dag=dag
)


In [None]:
#Here's an example of an Airflow DAG that generates tasks dynamically using PythonOperator, where each task processes an element from the input list:
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from datetime import datetime

default_args = {
    'start_date': datetime(2023, 7, 15)
}

def process_element(element):
    # Task logic for processing an element
    print(f'Processing element: {element}')

dag = DAG('dynamic_task_dag', default_args=default_args, schedule_interval=None)

input_list = [1, 2, 3, 4, 5]

for element in input_list:
    task = PythonOperator(
        task_id=f'process_element_{element}',
        python_callable=process_element,
        op_args=[element],
        dag=dag
    )


In [None]:
#To set up a complex task dependency in Airflow, where Task B should start only if Task A has successfully completed, you can use the "TriggerDagRunOperator" along with a custom TriggerRule.
from airflow import DAG
from airflow.operators.dagrun_operator import TriggerDagRunOperator
from airflow.operators.dummy_operator import DummyOperator
from datetime import datetime

default_args = {
    'start_date': datetime(2023, 7, 15)
}

dag = DAG('complex_dependency_dag', default_args=default_args, schedule_interval=None)

task_a = DummyOperator(
    task_id='task_a',
    dag=dag
)

task_b = TriggerDagRunOperator(
    task_id='task_b',
    trigger_dag_id='complex_dependency_dag',
    dag=dag,
    trigger_rule='all_success'
)

task_a >> task_b


In [None]:
#1. Scenario: You want to import data from an Oracle database into Hadoop using Sqoop, but you only need to import specific columns from a specific table. Write a Sqoop command that performs the import, including the necessary arguments for column selection and table mapping.
#2. Scenario: You have a requirement to perform an incremental import of data from a MySQL database into Hadoop using Sqoop. Design a Sqoop command that imports only the new or updated records since the last import.
#3. Scenario: You need to export data from Hadoop to a Microsoft SQL Server database using Sqoop. Develop a Sqoop command that exports the data, considering factors like database connection details, table mapping, and appropriate data types.



In [None]:
#Here's an example of a Sqoop command that imports specific columns from a specific table in an Oracle database into Hadoop:
sqoop import \
  --connect jdbc:oracle:thin:@<oracle_host>:<oracle_port>:<oracle_sid> \
  --username <username> \
  --password <password> \
  --table <table_name> \
  --columns "<column1>,<column2>,<column3>" \
  --target-dir <target_directory>
#To perform an incremental import of data from a MySQL database into Hadoop using Sqoop, you can use the --incremental and --last-value options.
sqoop import \
  --connect jdbc:mysql://<mysql_host>:<mysql_port>/<database> \
  --username <username> \
  --password <password> \
  --table <table_name> \
  --incremental append \
  --check-column <timestamp_column> \
  --last-value <last_imported_value> \
  --target-dir <target_directory>
#To export data from Hadoop to a Microsoft SQL Server database using Sqoop, you can use the export command.
sqoop export \
  --connect "jdbc:sqlserver://<sql_server_host>:<sql_server_port>;database=<database>;username=<username>;password=<password>" \
  --table <table_name> \
  --export-dir <export_directory> \
  --input-fields-terminated-by ',' \
  --input-lines-terminated-by '\n'
